From 4a86e17e5cb76a3e8d71aaf84bf19345e1a919fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Schmidts?= Date: Wed, 27 Jul 2016 10:45:56 +0200 Subject: [PATCH 001/164] downgrading log level to avoid spam --- src/lib/conf_handling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/conf_handling.py b/src/lib/conf_handling.py index 424b33ffd..b3ddd725a 100644 --- a/src/lib/conf_handling.py +++ b/src/lib/conf_handling.py @@ -140,7 +140,7 @@ def _get_fd(self, mode): for path in self.paths: try: fd = open(path, mode) - logger.warn('will use conf from %r', path) + logger.info('will use conf from %r', path) return fd except PermissionError: if os.path.exists(path): From f1a1cb905e1dd1ce5770feaecedcfa96ed350596 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Schmidts?= Date: Wed, 27 Jul 2016 10:46:42 +0200 Subject: [PATCH 002/164] reset_feeds: only reseting active feed --- src/manager.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/manager.py b/src/manager.py index 99e4f34b3..99df12f03 100755 --- a/src/manager.py +++ b/src/manager.py @@ -45,12 +45,19 @@ def fetch(limit=100, retreive_all=False): @manager.command def reset_feeds(): - contr = FeedController() - step = timedelta(seconds=3600 / contr.read().count()) + from web.models import User + fcontr = FeedController(ignore_context=True) now = datetime.utcnow() - for i, feed in enumerate(contr.read() - .order_by(contr._db_cls.last_retrieved)): - contr.update({'id': feed.id}, + last_conn_max = now - timedelta(days=30) + + feeds = list(fcontr.read().join(User).filter(User.is_active == True, + User.last_connection >= last_conn_max)\ + .with_entities(fcontr._db_cls.user_id)\ + .distinct()) + + step = timedelta(seconds=3600 / fcontr.read().count()) + for i, feed in enumerate(feeds): + fcontr.update({'id': feed[0]}, {'etag': '', 'last_modified': '', 'last_retrieved': now - i * step}) From 789dbc33f8c9a404b94140809e39548f3f737f24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Schmidts?= Date: Fri, 29 Jul 2016 16:32:27 +0200 Subject: [PATCH 003/164] setting global timeout for every crawler --- src/crawler/http_crawler.py | 3 ++- src/crawler/lib/article_utils.py | 3 ++- src/lib/conf_handling.py | 1 + src/tests/base.py | 1 + src/web/controllers/icon.py | 7 ++++++- src/web/lib/feed_utils.py | 2 +- src/web/lib/utils.py | 4 +++- 7 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/crawler/http_crawler.py b/src/crawler/http_crawler.py index 6623cbd38..9edc1eb55 100644 --- a/src/crawler/http_crawler.py +++ b/src/crawler/http_crawler.py @@ -49,7 +49,7 @@ def query_jarr(self, method, urn, data=None): data = {} method = getattr(self.session, method) future = method("%s%s/%s" % (self.url, conf.API_ROOT.strip('/'), urn), - auth=self.auth, + auth=self.auth, timeout=conf.CRAWLER_TIMEOUT, data=json.dumps(data, default=default_handler), headers={'Content-Type': 'application/json', 'User-Agent': conf.CRAWLER_USER_AGENT}) @@ -259,6 +259,7 @@ def callback(self, response): logger.debug('%r %r - fetching resources', feed['id'], feed['title']) future = self.session.get(feed['link'], + timeout=conf.CRAWLER_TIMEOUT, headers=self.prepare_headers(feed)) self._futures.append(future) diff --git a/src/crawler/lib/article_utils.py b/src/crawler/lib/article_utils.py index 8958b8f00..b9a921186 100644 --- a/src/crawler/lib/article_utils.py +++ b/src/crawler/lib/article_utils.py @@ -73,7 +73,8 @@ def get_article_link(entry): if conf.CRAWLER_RESOLV and article_link: try: # resolves URL behind proxies (like feedproxy.google.com) - response = requests.get(article_link, verify=False, timeout=5.0) + response = requests.get(article_link, verify=False, + timeout=conf.CRAWLER_TIMEOUT) article_link = response.url except Exception as error: logger.warning("Unable to get the real URL of %s. Error: %s", diff --git a/src/lib/conf_handling.py b/src/lib/conf_handling.py index b3ddd725a..13f594781 100644 --- a/src/lib/conf_handling.py +++ b/src/lib/conf_handling.py @@ -48,6 +48,7 @@ 'choices': ABS_CHOICES, 'edit': False}, {'key': 'USER_AGENT', 'edit': False, 'default': 'https://github.com/jaesivsm/JARR'}, + {'key': 'TIMEOUT', 'edit': False, 'default': 30}, ]}, {'prefix': 'PLUGINS', 'options': [ {'key': 'READABILITY_KEY', 'default': '', diff --git a/src/tests/base.py b/src/tests/base.py index 7807001a8..75a04e5ba 100644 --- a/src/tests/base.py +++ b/src/tests/base.py @@ -75,6 +75,7 @@ def _api(self, method_name, *urn_parts, **kwargs): del kwargs['user'] urn = path.join(conf.API_ROOT, *map(str, urn_parts)) + kwargs.pop('timeout', None) # removing timeout non supported by flask resp = method(urn, **kwargs) if resp.data and resp.content_type == 'application/json': resp.json = lambda *a, **kw: json.loads(resp.data.decode('utf8')) diff --git a/src/web/controllers/icon.py b/src/web/controllers/icon.py index 07c4a4ef1..6a6880164 100644 --- a/src/web/controllers/icon.py +++ b/src/web/controllers/icon.py @@ -1,5 +1,6 @@ import base64 import requests +from bootstrap import conf from web.models import Icon from .abstract import AbstractController @@ -10,7 +11,11 @@ class IconController(AbstractController): def _build_from_url(self, attrs): if 'url' in attrs and 'content' not in attrs: - resp = requests.get(attrs['url'], verify=False) + try: + resp = requests.get(attrs['url'], verify=False, + timemout=conf.CRAWLER_TIMEOUT) + except Exception: + return attrs attrs.update({'url': resp.url, 'mimetype': resp.headers.get('content-type', None), 'content': base64.b64encode(resp.content).decode('utf8')}) diff --git a/src/web/lib/feed_utils.py b/src/web/lib/feed_utils.py index fab691930..18b369819 100644 --- a/src/web/lib/feed_utils.py +++ b/src/web/lib/feed_utils.py @@ -33,7 +33,7 @@ def metawrapper(*args, **kwargs): @escape_keys('title', 'description') def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True): requests_kwargs = {'headers': {'User-Agent': conf.CRAWLER_USER_AGENT}, - 'verify': False} + 'verify': False, 'timeout': conf.CRAWLER_TIMEOUT} if url is None and fp_parsed is not None: url = fp_parsed.get('url') if url is not None and fp_parsed is None: diff --git a/src/web/lib/utils.py b/src/web/lib/utils.py index 3f6764db3..0f318aa2d 100644 --- a/src/web/lib/utils.py +++ b/src/web/lib/utils.py @@ -4,6 +4,7 @@ import logging import requests from hashlib import md5 +from bootstrap import conf from flask import request, url_for logger = logging.getLogger(__name__) @@ -49,7 +50,8 @@ def try_get_icon_url(url, *splits): response = None # if html in content-type, we assume it's a fancy 404 page try: - response = requests.get(rb_url, verify=False, timeout=10) + response = requests.get(rb_url, verify=False, + timeout=conf.CRAWLER_TIMEOUT) content_type = response.headers.get('content-type', '') except Exception: pass From 378700b9490af1884517641c803d1d2be32db7c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Schmidts?= Date: Wed, 17 Aug 2016 15:27:29 +0200 Subject: [PATCH 004/164] fixing article with not title --- src/crawler/lib/article_utils.py | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/src/crawler/lib/article_utils.py b/src/crawler/lib/article_utils.py index b9a921186..b78face2f 100644 --- a/src/crawler/lib/article_utils.py +++ b/src/crawler/lib/article_utils.py @@ -3,6 +3,7 @@ import requests import dateutil.parser from datetime import datetime, timezone +from bs4 import BeautifulSoup, SoupStrainer from bootstrap import conf from web.lib.utils import to_hash @@ -47,14 +48,13 @@ def construct_article(entry, feed): break content = get_article_content(entry) - link = get_article_link(entry) + link, title = get_article_details(entry) content = clean_urls(content, link) return {'feed_id': feed['id'], 'user_id': feed['user_id'], 'entry_id': extract_id(entry).get('entry_id', None), - 'link': link, 'content': content, - 'title': html.unescape(entry.get('title', 'No title')), + 'link': link, 'content': content, 'title': title, 'readed': False, 'like': False, 'retrieved_date': now, 'date': date or now} @@ -68,15 +68,23 @@ def get_article_content(entry): return content -def get_article_link(entry): +def get_article_details(entry): article_link = entry.get('link') - if conf.CRAWLER_RESOLV and article_link: + article_title = html.unescape(entry.get('title', '')) + if conf.CRAWLER_RESOLV and article_link or not article_title: try: # resolves URL behind proxies (like feedproxy.google.com) - response = requests.get(article_link, verify=False, - timeout=conf.CRAWLER_TIMEOUT) - article_link = response.url + response = requests.get(article_link, verify=False, timeout=5.0) except Exception as error: - logger.warning("Unable to get the real URL of %s. Error: %s", - article_link, error) - return article_link + logger.warning("Unable to get the real URL of %s. Won't fix link " + "or title. Error: %s", article_link, error) + return article_link, article_title + article_link = response.url + if not article_title: + bs_parsed = BeautifulSoup(response.content, 'html.parser', + parse_only=SoupStrainer('head')) + try: + article_title = bs_parsed.find_all('title')[0].text + except IndexError: # no title + pass + return article_link, article_title From 532684ad26c5de03fe3b7774cf3227cf7ee753ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Schmidts?= Date: Wed, 17 Aug 2016 15:57:54 +0200 Subject: [PATCH 005/164] refact, not using requests.get, always applying user-agent and other options --- requirements.txt | 2 +- src/crawler/http_crawler.py | 2 +- src/crawler/lib/article_utils.py | 5 ++--- src/{web => }/lib/utils.py | 9 +++++++-- src/web/controllers/feed.py | 2 +- src/web/controllers/icon.py | 6 ++---- src/web/lib/feed_utils.py | 10 +++------- src/web/lib/view_utils.py | 2 +- src/web/utils.py | 2 +- src/web/views/admin.py | 2 +- src/web/views/common.py | 2 +- src/web/views/home.py | 2 +- 12 files changed, 22 insertions(+), 24 deletions(-) rename src/{web => }/lib/utils.py (90%) diff --git a/requirements.txt b/requirements.txt index f8bfa4494..42251c87a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,7 +17,7 @@ opml==0.5 python-dateutil==2.4.2 python-postmark==0.4.7 rauth==0.7.2 -requests==2.10.0 +requests==2.11.0 requests-futures==0.9.7 SQLAlchemy==1.0.11 WTForms==2.1 diff --git a/src/crawler/http_crawler.py b/src/crawler/http_crawler.py index 9edc1eb55..c3b4154b3 100644 --- a/src/crawler/http_crawler.py +++ b/src/crawler/http_crawler.py @@ -22,7 +22,7 @@ from datetime import datetime, timedelta from concurrent.futures import wait, ThreadPoolExecutor from requests_futures.sessions import FuturesSession -from web.lib.utils import default_handler, to_hash +from lib.utils import default_handler, to_hash from web.lib.feed_utils import construct_feed_from from crawler.lib.article_utils import extract_id, construct_article diff --git a/src/crawler/lib/article_utils.py b/src/crawler/lib/article_utils.py index b78face2f..ea1353fd4 100644 --- a/src/crawler/lib/article_utils.py +++ b/src/crawler/lib/article_utils.py @@ -1,12 +1,11 @@ import html import logging -import requests import dateutil.parser from datetime import datetime, timezone from bs4 import BeautifulSoup, SoupStrainer from bootstrap import conf -from web.lib.utils import to_hash +from lib.utils import to_hash, jarr_get from web.lib.article_cleaner import clean_urls logger = logging.getLogger(__name__) @@ -74,7 +73,7 @@ def get_article_details(entry): if conf.CRAWLER_RESOLV and article_link or not article_title: try: # resolves URL behind proxies (like feedproxy.google.com) - response = requests.get(article_link, verify=False, timeout=5.0) + response = jarr_get(article_link) except Exception as error: logger.warning("Unable to get the real URL of %s. Won't fix link " "or title. Error: %s", article_link, error) diff --git a/src/web/lib/utils.py b/src/lib/utils.py similarity index 90% rename from src/web/lib/utils.py rename to src/lib/utils.py index 0f318aa2d..90b06c592 100644 --- a/src/web/lib/utils.py +++ b/src/lib/utils.py @@ -50,8 +50,7 @@ def try_get_icon_url(url, *splits): response = None # if html in content-type, we assume it's a fancy 404 page try: - response = requests.get(rb_url, verify=False, - timeout=conf.CRAWLER_TIMEOUT) + response = jarr_get(rb_url) content_type = response.headers.get('content-type', '') except Exception: pass @@ -79,3 +78,9 @@ def clear_string(data): def redirect_url(default='home'): return request.args.get('next') or request.referrer or url_for(default) + + +def jarr_get(url): + return requests.get(url, verify=False, allow_redirects=True, + timeout=conf.CRAWLER_TIMEOUT, + headers={'User-Agent': conf.CRAWLER_USER_AGENT}) diff --git a/src/web/controllers/feed.py b/src/web/controllers/feed.py index 48da2266c..033bfa977 100644 --- a/src/web/controllers/feed.py +++ b/src/web/controllers/feed.py @@ -6,7 +6,7 @@ from .abstract import AbstractController from .icon import IconController from web.models import User, Feed -from web.lib.utils import clear_string +from lib.utils import clear_string logger = logging.getLogger(__name__) DEFAULT_LIMIT = 5 diff --git a/src/web/controllers/icon.py b/src/web/controllers/icon.py index 6a6880164..a9a56930f 100644 --- a/src/web/controllers/icon.py +++ b/src/web/controllers/icon.py @@ -1,6 +1,5 @@ import base64 -import requests -from bootstrap import conf +from lib.utils import jarr_get from web.models import Icon from .abstract import AbstractController @@ -12,8 +11,7 @@ class IconController(AbstractController): def _build_from_url(self, attrs): if 'url' in attrs and 'content' not in attrs: try: - resp = requests.get(attrs['url'], verify=False, - timemout=conf.CRAWLER_TIMEOUT) + resp = jarr_get(attrs['url']) except Exception: return attrs attrs.update({'url': resp.url, diff --git a/src/web/lib/feed_utils.py b/src/web/lib/feed_utils.py index 18b369819..5abeae239 100644 --- a/src/web/lib/feed_utils.py +++ b/src/web/lib/feed_utils.py @@ -1,12 +1,10 @@ import html import urllib import logging -import requests import feedparser from bs4 import BeautifulSoup, SoupStrainer -from bootstrap import conf -from web.lib.utils import try_keys, try_get_icon_url, rebuild_url +from lib.utils import try_keys, try_get_icon_url, rebuild_url, jarr_get logger = logging.getLogger(__name__) logging.captureWarnings(True) @@ -32,13 +30,11 @@ def metawrapper(*args, **kwargs): @escape_keys('title', 'description') def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True): - requests_kwargs = {'headers': {'User-Agent': conf.CRAWLER_USER_AGENT}, - 'verify': False, 'timeout': conf.CRAWLER_TIMEOUT} if url is None and fp_parsed is not None: url = fp_parsed.get('url') if url is not None and fp_parsed is None: try: - response = requests.get(url, **requests_kwargs) + response = jarr_get(url) fp_parsed = feedparser.parse(response.content, request_headers=response.headers) except Exception as error: @@ -72,7 +68,7 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True): return feed try: - response = requests.get(feed['site_link'], **requests_kwargs) + response = jarr_get(feed['site_link']) except Exception as error: logger.warn('failed to retreive %r: %r', feed['site_link'], error) return feed diff --git a/src/web/lib/view_utils.py b/src/web/lib/view_utils.py index d4c119da6..1d8c6aed1 100644 --- a/src/web/lib/view_utils.py +++ b/src/web/lib/view_utils.py @@ -1,6 +1,6 @@ from functools import wraps from flask import request, Response, make_response -from web.lib.utils import to_hash +from lib.utils import to_hash def etag_match(func): diff --git a/src/web/utils.py b/src/web/utils.py index fa1005bd4..f6e21f9f1 100755 --- a/src/web/utils.py +++ b/src/web/utils.py @@ -26,7 +26,7 @@ from bootstrap import conf from web import controllers from web.models import Article -from web.lib.utils import clear_string +from lib.utils import clear_string logger = logging.getLogger(__name__) diff --git a/src/web/views/admin.py b/src/web/views/admin.py index 5dd5abf7d..3212b7a12 100644 --- a/src/web/views/admin.py +++ b/src/web/views/admin.py @@ -6,8 +6,8 @@ from flask.ext.babel import gettext, format_timedelta from flask.ext.login import login_required, current_user +from lib.utils import redirect_url from web.views.common import admin_permission -from web.lib.utils import redirect_url from web.controllers import UserController, FeedController, ArticleController logger = logging.getLogger(__name__) diff --git a/src/web/views/common.py b/src/web/views/common.py index 987e94a82..9806fadc4 100644 --- a/src/web/views/common.py +++ b/src/web/views/common.py @@ -4,7 +4,7 @@ from flask.ext.login import login_user from flask.ext.principal import (Identity, Permission, RoleNeed, session_identity_loader, identity_changed) -from web.lib.utils import default_handler +from lib.utils import default_handler admin_role = RoleNeed('admin') api_role = RoleNeed('api') diff --git a/src/web/views/home.py b/src/web/views/home.py index cabdd89de..d6674db96 100644 --- a/src/web/views/home.py +++ b/src/web/views/home.py @@ -9,7 +9,7 @@ from babel.dates import format_datetime, format_timedelta from bootstrap import conf -from web.lib.utils import redirect_url +from lib.utils import redirect_url from web.lib.article_cleaner import clean_urls from web import utils from web.lib.view_utils import etag_match From da5969c7dc27d2f004b342042b80e1c48b7b2de4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Schmidts?= Date: Tue, 6 Sep 2016 16:49:28 +0200 Subject: [PATCH 006/164] push new articles in batch per feed --- src/crawler/http_crawler.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/crawler/http_crawler.py b/src/crawler/http_crawler.py index c3b4154b3..13dee8eb5 100644 --- a/src/crawler/http_crawler.py +++ b/src/crawler/http_crawler.py @@ -103,15 +103,16 @@ def callback(self, response): logger.debug('%r %r - %d entries were not matched ' 'and will be created', self.feed['id'], self.feed['title'], len(results)) + entries = [] for id_to_create in results: article_created = True - entry = construct_article( + entries.append(construct_article( self.entries[tuple(sorted(id_to_create.items()))], - self.feed) + self.feed)) logger.info('%r %r - creating %r for %r - %r', self.feed['id'], - self.feed['title'], entry['title'], - entry['user_id'], id_to_create) - self.query_jarr('post', 'article', entry) + self.feed['title'], entries[-1]['title'], + entries[-1]['user_id'], id_to_create) + self.query_jarr('post', 'articles', entries) logger.debug('%r %r - updating feed etag %r last_mod %r', self.feed['id'], self.feed['title'], From 6fa2294469a67f478ea8b12f3887066409b498dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Schmidts?= Date: Tue, 6 Sep 2016 16:58:05 +0200 Subject: [PATCH 007/164] icon needs different delete method --- src/web/controllers/icon.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/web/controllers/icon.py b/src/web/controllers/icon.py index a9a56930f..7a4bc9d4d 100644 --- a/src/web/controllers/icon.py +++ b/src/web/controllers/icon.py @@ -1,4 +1,5 @@ import base64 +from bootstrap import db from lib.utils import jarr_get from web.models import Icon from .abstract import AbstractController @@ -24,3 +25,9 @@ def create(self, **attrs): def update(self, filters, attrs): return super().update(filters, self._build_from_url(attrs)) + + def delete(self, url): + obj = self.get(url=url) + db.session.delete(obj) + db.session.commit() + return obj From 03d3f776b6969719ad21f0e597585c90a1a648db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Schmidts?= Date: Fri, 9 Sep 2016 15:59:38 +0200 Subject: [PATCH 008/164] fixing broken images from readability parsed content close #51 --- src/web/lib/article_cleaner.py | 9 +++++++-- src/web/views/home.py | 3 ++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/web/lib/article_cleaner.py b/src/web/lib/article_cleaner.py index 4c3fbd2e3..9e4e9c09d 100644 --- a/src/web/lib/article_cleaner.py +++ b/src/web/lib/article_cleaner.py @@ -1,17 +1,22 @@ -from urllib.parse import urlparse, urlunparse, ParseResult +from urllib.parse import unquote, urlparse, urlunparse, ParseResult from bs4 import BeautifulSoup from bootstrap import is_secure_served HTTPS_IFRAME_DOMAINS = ('vimeo.com', 'youtube.com', 'youtu.be') -def clean_urls(article_content, article_link): +def clean_urls(article_content, article_link, fix_readability=False): parsed_article_url = urlparse(article_link) parsed_content = BeautifulSoup(article_content, 'html.parser') for img in parsed_content.find_all('img'): if 'src' not in img.attrs: continue + # bug reported to readability, fixing it here for now + if fix_readability: + splited_src = unquote(img.attrs['src']).split(', ') + if len(splited_src) > 1: + img.attrs['src'] = splited_src[0].split()[0] if is_secure_served() and 'srcset' in img.attrs: # removing active content when serving over https del img.attrs['srcset'] diff --git a/src/web/views/home.py b/src/web/views/home.py index d6674db96..60ae79507 100644 --- a/src/web/views/home.py +++ b/src/web/views/home.py @@ -163,7 +163,8 @@ def get_article(article_id, parse=False): article['readability_parsed'] = False else: article['readability_parsed'] = True - article['content'] = clean_urls(new_content, article['link']) + article['content'] = clean_urls(new_content, article['link'], + fix_readability=True) new_attr = {'readability_parsed': True, 'content': new_content} contr.update({'id': article['id']}, new_attr) return article From 018a852c2d50806b5443577981efc321cabc3767 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Schmidts?= Date: Sun, 2 Oct 2016 12:03:18 +0200 Subject: [PATCH 009/164] feed construction was kinda broken --- src/web/lib/feed_utils.py | 50 +++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/src/web/lib/feed_utils.py b/src/web/lib/feed_utils.py index 5abeae239..5033c1764 100644 --- a/src/web/lib/feed_utils.py +++ b/src/web/lib/feed_utils.py @@ -2,14 +2,15 @@ import urllib import logging import feedparser +from bootstrap import conf from bs4 import BeautifulSoup, SoupStrainer -from lib.utils import try_keys, try_get_icon_url, rebuild_url, jarr_get +from lib.utils import try_get_icon_url, rebuild_url, jarr_get logger = logging.getLogger(__name__) logging.captureWarnings(True) -ACCEPTED_MIMETYPES = ('application/rss+xml', 'application/rdf+xml', - 'application/atom+xml', 'application/xml', 'text/xml') +FEED_MIMETYPES = ('application/atom+xml', 'application/rss+xml', + 'application/rdf+xml', 'application/xml', 'text/xml') def is_parsing_ok(parsed_feed): @@ -28,15 +29,22 @@ def metawrapper(*args, **kwargs): return wrapper +def _browse_feedparser_feed(feed, check): + if feed.get('feed', {}).get('links') is None: + return + for link in feed['feed']['links']: + if check(link): + return link['href'] + + @escape_keys('title', 'description') def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True): if url is None and fp_parsed is not None: url = fp_parsed.get('url') if url is not None and fp_parsed is None: try: - response = jarr_get(url) - fp_parsed = feedparser.parse(response.content, - request_headers=response.headers) + fp_parsed = feedparser.parse(url, + request_headers={'User-Agent': conf.CRAWLER_USER_AGENT}) except Exception as error: logger.warn('failed to retreive that url: %r', error) fp_parsed = {'bozo': True} @@ -44,14 +52,30 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True): feed = feed or {} feed_split = urllib.parse.urlsplit(url) site_split = None + feed['site_link'] = url + feed['link'] = _browse_feedparser_feed(fp_parsed, + lambda link: link['type'] in FEED_MIMETYPES) + + if not is_parsing_ok(fp_parsed) and feed.get('link'): + try: + fp_parsed = feedparser.parse(feed['link'], + request_headers={'User-Agent': conf.CRAWLER_USER_AGENT}) + except Exception as error: + logger.warn('failed to retreive that url: %r', error) + fp_parsed = {'bozo': True} + url = feed['link'] + if is_parsing_ok(fp_parsed): feed['link'] = url - feed['site_link'] = try_keys(fp_parsed['feed'], 'href', 'link') - feed['title'] = fp_parsed['feed'].get('title') - feed['description'] = try_keys(fp_parsed['feed'], 'subtitle', 'title') - feed['icon_url'] = try_keys(fp_parsed['feed'], 'icon') - else: - feed['site_link'] = url + feed['site_link'] = fp_parsed['feed'].get('link') \ + or _browse_feedparser_feed(fp_parsed, + lambda link: link['rel'] == 'alternate' + and link['type'] == 'text/html') + feed['title'] = fp_parsed['feed'].get('title_detail', {}).get('value') + feed['description'] = fp_parsed['feed']\ + .get('subtitle_detail', {}).get('value') + feed['icon_url'] = _browse_feedparser_feed(fp_parsed, + lambda link: 'icon' in link['rel']) if feed.get('site_link'): feed['site_link'] = rebuild_url(feed['site_link'], feed_split) @@ -109,7 +133,7 @@ def wrapper(elem): del feed['icon_url'] if not feed.get('link'): - for type_ in ACCEPTED_MIMETYPES: + for type_ in FEED_MIMETYPES: alternates = bs_parsed.find_all(check_keys( rel=['alternate'], type=[type_])) if len(alternates) >= 1: From 5d8ac70352500fb90a523d89a93dc178b3960143 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Schmidts?= Date: Sun, 2 Oct 2016 18:41:43 +0200 Subject: [PATCH 010/164] big feature: clustering close #14 --- .travis.yml | 3 +- install.py | 8 +- package.json | 18 +- requirements.postgres.txt | 2 +- requirements.txt | 23 +-- src/bootstrap.py | 4 +- src/crawler/classic_crawler.py | 1 - src/crawler/http_crawler.py | 17 +- src/crawler/lib/article_utils.py | 1 - src/lib/utils.py | 2 +- src/manager.py | 6 +- .../versions/9e3fecc9d031_oauth_linuxfr.py | 2 +- .../versions/a7f62d50d366_clustering.py | 141 +++++++++++++ src/runserver.py | 4 +- src/scripts/probes.py | 2 +- src/tests/api/article_test.py | 35 +--- src/tests/api/category_test.py | 6 +- src/tests/api/cluster_test.py | 60 ++++++ src/tests/api/feed_test.py | 18 +- src/tests/base.py | 2 +- src/tests/controllers/article_test.py | 103 +++++----- src/tests/controllers/cluster_test.py | 120 +++++++++++ src/tests/crawler_test.py | 18 +- src/tests/fixtures/filler.py | 44 ++-- src/tests/ui_test.py | 40 +++- src/web/controllers/__init__.py | 3 +- src/web/controllers/abstract.py | 50 +++-- src/web/controllers/article.py | 31 +-- src/web/controllers/category.py | 5 - src/web/controllers/cluster.py | 183 +++++++++++++++++ src/web/controllers/feed.py | 6 +- src/web/controllers/icon.py | 3 +- src/web/controllers/user.py | 4 +- src/web/forms.py | 6 +- src/web/js/actions/MenuActions.js | 2 +- src/web/js/actions/MiddlePanelActions.js | 35 ++-- src/web/js/actions/RightPanelActions.js | 25 ++- src/web/js/components/MainApp.react.js | 9 +- src/web/js/components/Menu.react.js | 31 ++- src/web/js/components/MiddlePanel.react.js | 163 ++++++++++----- src/web/js/components/Navbar.react.js | 22 +- src/web/js/components/RightPanel.react.js | 139 ++++++------- src/web/js/constants/JarrConstants.js | 4 +- src/web/js/stores/MenuStore.js | 131 ++++++------ src/web/js/stores/MiddlePanelStore.js | 121 +++++------ src/web/js/stores/RightPanelStore.js | 45 ++++- src/web/lib/feed_utils.py | 22 +- src/web/lib/view_utils.py | 21 ++ src/web/models/__init__.py | 15 +- src/web/models/article.py | 61 +++--- src/web/models/category.py | 22 +- src/web/models/cluster.py | 72 +++++++ src/web/models/feed.py | 52 ++--- src/web/models/icon.py | 11 +- src/web/models/relationships.py | 13 ++ src/web/models/right_mixin.py | 5 +- src/web/models/user.py | 62 +++--- src/web/static/css/one-page-app.css | 18 +- src/web/static/js/articles.js | 191 ------------------ src/web/templates/categories.html | 2 +- src/web/templates/feed_list.html | 2 +- src/web/views/__init__.py | 5 +- src/web/views/admin.py | 13 +- src/web/views/api/__init__.py | 4 +- src/web/views/api/article.py | 2 +- src/web/views/api/category.py | 2 +- src/web/views/api/cluster.py | 26 +++ src/web/views/api/common.py | 18 +- src/web/views/api/feed.py | 2 +- src/web/views/article.py | 23 +-- src/web/views/category.py | 4 +- src/web/views/cluster.py | 15 ++ src/web/views/common.py | 4 +- src/web/views/feed.py | 12 +- src/web/views/home.py | 120 +++++------ src/web/views/icon.py | 11 +- src/web/views/session_mgmt.py | 12 +- src/web/views/user.py | 6 +- src/web/views/views.py | 2 +- submodules/bootstrap | 2 +- 80 files changed, 1553 insertions(+), 997 deletions(-) create mode 100644 src/migrations/versions/a7f62d50d366_clustering.py create mode 100644 src/tests/api/cluster_test.py create mode 100644 src/tests/controllers/cluster_test.py create mode 100644 src/web/controllers/cluster.py create mode 100644 src/web/models/cluster.py create mode 100644 src/web/models/relationships.py delete mode 100644 src/web/static/js/articles.js create mode 100644 src/web/views/api/cluster.py create mode 100644 src/web/views/cluster.py diff --git a/.travis.yml b/.travis.yml index ba9251be8..b4675d2c1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,7 +18,8 @@ install: script: - nosetests --with-coverage --cover-package=bootstrap,runserver,web,crawler,plugins src/tests/ - - pep8 --ignore=E126,E127,E128,E302,E712,W503 src/web/ src/crawler/ src/plugins/ src/lib/ + # ignoring aligning / line breaking non-sens + - pep8 --ignore=E126,E127,E128,W503 src/web/ src/crawler/ src/plugins/ src/lib/ after_success: - coveralls diff --git a/install.py b/install.py index 8315c5f3f..a72032230 100755 --- a/install.py +++ b/install.py @@ -150,9 +150,11 @@ def install_python_deps(args): conf.reload() except Exception: pass - if not args.no_requirements: - install_postgres = 'postgres' in getattr( - conf, 'SQLALCHEMY_DATABASE_URI', '') + if args.no_requirements: + return + + install_postgres = 'postgres' in getattr( + conf, 'SQLALCHEMY_DATABASE_URI', '') print('installing python dependencies...') base_cmd = ['install', '--quiet', '--upgrade', '-r'] diff --git a/package.json b/package.json index 8d60be4c6..0a0303b75 100644 --- a/package.json +++ b/package.json @@ -17,22 +17,22 @@ }, "main": "src/web/js/app.js", "dependencies": { - "bootstrap": "^3.3.6", - "classnames": "^2.1.3", + "bootstrap": "^3.3.7", + "classnames": "^2.2.5", "flux": "^2.0.1", - "jquery": "^2.2.0", + "jquery": "^3.1.0", "keymirror": "~0.1.0", - "object-assign": "^1.0.0", + "object-assign": "^4.1.0", "react": "^0.14.6", "react-bootstrap": "^0.28.0", "react-dom": "^0.14.6" }, "devDependencies": { - "browserify": "^6.2.0", - "envify": "^3.0.0", - "reactify": "^0.15.2", - "uglify-js": "~2.4.15", - "watchify": "^2.1.1" + "browserify": "^13.1.0", + "envify": "^3.4.1", + "reactify": "^1.1.1", + "uglify-js": "~2.7.3", + "watchify": "^3.7.0" }, "scripts": { "start": "watchify -o src/web/static/js/bundle.min.js -v -d src/web/js/app.js", diff --git a/requirements.postgres.txt b/requirements.postgres.txt index 3d49e0822..59117830d 100644 --- a/requirements.postgres.txt +++ b/requirements.postgres.txt @@ -1 +1 @@ -psycopg2==2.6.1 +psycopg2==2.6.2 diff --git a/requirements.txt b/requirements.txt index 42251c87a..a47e433b6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,23 +1,22 @@ -aiohttp==0.21.0 -alembic==0.8.4 -beautifulsoup4==4.4.1 +aiohttp==0.22.5 +alembic==0.8.7 +beautifulsoup4==4.5.1 feedparser==5.2.1 -Flask==0.10.1 -Flask-Babel==0.9 +Flask==0.11.1 +Flask-Babel==0.11.1 Flask-Login==0.3.2 -Flask-Migrate==1.7.0 +Flask-Migrate==2.0.0 Flask-Principal==0.4.0 Flask-RESTful==0.3.5 Flask-Script==2.0.5 Flask-SQLAlchemy==2.1 Flask-SSLify==0.1.5 Flask-WTF==0.12 -lxml==3.5.0 +lxml==3.6.4 opml==0.5 -python-dateutil==2.4.2 -python-postmark==0.4.7 +python-dateutil==2.5.3 +python-postmark==0.4.9 rauth==0.7.2 -requests==2.11.0 +requests==2.11.1 requests-futures==0.9.7 -SQLAlchemy==1.0.11 -WTForms==2.1 +SQLAlchemy==1.0.15 diff --git a/src/bootstrap.py b/src/bootstrap.py index e91f8f48c..6b0f61f03 100644 --- a/src/bootstrap.py +++ b/src/bootstrap.py @@ -4,11 +4,10 @@ # required imports and code exection for basic functionning import os -import json import logging from urllib.parse import urlparse from flask import Flask -from flask.ext.sqlalchemy import SQLAlchemy +from flask_sqlalchemy import SQLAlchemy from lib.conf_handling import ConfObject @@ -50,6 +49,7 @@ def set_logging(log_path=None, log_level=logging.INFO, modules=(), if os.environ.get('JARR_TESTING', False) == 'true': application.debug = True application.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///:memory:' + conf.SQLALCHEMY_DATABASE_URI = 'sqlite:///:memory:' application.config['TESTING'] = True conf.CRAWLER_NBWORKER = 1 else: diff --git a/src/crawler/classic_crawler.py b/src/crawler/classic_crawler.py index cf88b11f9..569401ea6 100644 --- a/src/crawler/classic_crawler.py +++ b/src/crawler/classic_crawler.py @@ -104,7 +104,6 @@ async def insert_database(user, feed): is_updated = True if existing_article.content != content: existing_article.content = content - existing_article.readed = False is_updated = True if is_updated: art_contr.update({'entry_id': existing_article.entry_id}, diff --git a/src/crawler/http_crawler.py b/src/crawler/http_crawler.py index 13dee8eb5..437000960 100644 --- a/src/crawler/http_crawler.py +++ b/src/crawler/http_crawler.py @@ -23,7 +23,7 @@ from concurrent.futures import wait, ThreadPoolExecutor from requests_futures.sessions import FuturesSession from lib.utils import default_handler, to_hash -from web.lib.feed_utils import construct_feed_from +from web.lib.feed_utils import construct_feed_from, is_parsing_ok from crawler.lib.article_utils import extract_id, construct_article logger = logging.getLogger(__name__) @@ -119,21 +119,24 @@ def callback(self, response): self.headers.get('etag', ''), self.headers.get('last-modified', '')) - up_feed = {'error_count': 0, 'last_error': None, - 'etag': self.headers.get('etag', ''), + up_feed = {'etag': self.headers.get('etag', ''), 'last_modified': self.headers.get('last-modified', strftime('%a, %d %b %Y %X %Z', gmtime()))} + + if not is_parsing_ok(self.parsed_feed): + up_feed['last_error'] = str(self.parsed_feed['bozo_exception']) + up_feed['error_count'] = self.feed['error_count'] + 1 + return self.query_jarr('put', 'feed/%d' % self.feed['id'], up_feed) + fresh_feed = construct_feed_from(url=self.feed['link'], fp_parsed=self.parsed_feed) if fresh_feed.get('description'): fresh_feed['description'] \ = html.unescape(fresh_feed['description']) - for key in ('description', 'site_link', 'icon_url'): + for key in 'description', 'site_link', 'icon_url': if fresh_feed.get(key) and fresh_feed[key] != self.feed.get(key): up_feed[key] = fresh_feed[key] - if not self.feed.get('title'): - up_feed['title'] = html.unescape(fresh_feed.get('title', '')) up_feed['user_id'] = self.feed['user_id'] # re-getting that feed earlier since new entries appeared if article_created: @@ -222,7 +225,7 @@ def callback(self, response): self.feed['id'], self.feed['title'], len(ids), ids) future = self.query_jarr('get', 'articles/challenge', {'ids': ids}) updater = JarrUpdater(self.feed, entries, response.headers, - parsed_response, self.auth) + parsed_response, self.auth) future.add_done_callback(updater.callback) diff --git a/src/crawler/lib/article_utils.py b/src/crawler/lib/article_utils.py index ea1353fd4..d906d4eb5 100644 --- a/src/crawler/lib/article_utils.py +++ b/src/crawler/lib/article_utils.py @@ -54,7 +54,6 @@ def construct_article(entry, feed): 'user_id': feed['user_id'], 'entry_id': extract_id(entry).get('entry_id', None), 'link': link, 'content': content, 'title': title, - 'readed': False, 'like': False, 'retrieved_date': now, 'date': date or now} diff --git a/src/lib/utils.py b/src/lib/utils.py index 90b06c592..187f7e636 100644 --- a/src/lib/utils.py +++ b/src/lib/utils.py @@ -16,7 +16,7 @@ def default_handler(obj, role='admin'): return obj.isoformat() if hasattr(obj, 'dump'): return obj.dump(role=role) - if isinstance(obj, (set, frozenset, types.GeneratorType)): + if isinstance(obj, (set, frozenset, filter, types.GeneratorType)): return list(obj) if isinstance(obj, BaseException): return str(obj) diff --git a/src/manager.py b/src/manager.py index 99df12f03..bb6c96da2 100755 --- a/src/manager.py +++ b/src/manager.py @@ -2,8 +2,8 @@ # -*- coding: utf-8 -*- import logging from datetime import datetime, timedelta -from flask.ext.script import Manager -from flask.ext.migrate import Migrate, MigrateCommand +from flask_script import Manager +from flask_migrate import Migrate, MigrateCommand from bootstrap import application, db, conf import web.models @@ -68,7 +68,7 @@ def fetch_asyncio(user_id, feed_id): import asyncio with application.app_context(): - from flask.ext.login import current_user + from flask_login import current_user from crawler import classic_crawler ucontr = UserController() users = [] diff --git a/src/migrations/versions/9e3fecc9d031_oauth_linuxfr.py b/src/migrations/versions/9e3fecc9d031_oauth_linuxfr.py index d9758bcf9..178d7de68 100644 --- a/src/migrations/versions/9e3fecc9d031_oauth_linuxfr.py +++ b/src/migrations/versions/9e3fecc9d031_oauth_linuxfr.py @@ -1,4 +1,4 @@ -"""empty message +"""adding linuxfr as an oauth provider Revision ID: 9e3fecc9d031 Revises: 122ac0c356c diff --git a/src/migrations/versions/a7f62d50d366_clustering.py b/src/migrations/versions/a7f62d50d366_clustering.py new file mode 100644 index 000000000..44d38f7e3 --- /dev/null +++ b/src/migrations/versions/a7f62d50d366_clustering.py @@ -0,0 +1,141 @@ +"""adding tables to support clustering + +Revision ID: a7f62d50d366 +Revises: 9e3fecc9d031 +Create Date: 2016-07-31 19:45:19.889247 + +""" + +# revision identifiers, used by Alembic. +revision = 'a7f62d50d366' +down_revision = '9e3fecc9d031' +branch_labels = None +depends_on = None + +from datetime import datetime +from bootstrap import conf +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + op.create_table('cluster', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('cluster_type', sa.String(), nullable=True), + sa.Column('main_link', sa.String(), nullable=True), + sa.Column('read', sa.Boolean(), nullable=True), + sa.Column('liked', sa.Boolean(), nullable=True), + sa.Column('created_date', sa.DateTime(), nullable=True), + sa.Column('main_date', sa.DateTime(), nullable=True), + sa.Column('main_feed_title', sa.String(), nullable=True), + sa.Column('main_title', sa.String(), nullable=True), + sa.Column('main_article_id', sa.Integer(), nullable=True), + sa.Column('user_id', sa.Integer(), nullable=True), + sa.ForeignKeyConstraint(['user_id'], ['user.id']), + sa.PrimaryKeyConstraint('id')) + op.create_table('cluster_as_category', + sa.Column('cluster_id', sa.Integer(), nullable=True), + sa.Column('category_id', sa.Integer(), nullable=True), + sa.ForeignKeyConstraint(['category_id'], ['category.id']), + sa.ForeignKeyConstraint(['cluster_id'], ['cluster.id'])) + op.create_table('cluster_as_feed', + sa.Column('cluster_id', sa.Integer(), nullable=True), + sa.Column('feed_id', sa.Integer(), nullable=True), + sa.ForeignKeyConstraint(['cluster_id'], ['cluster.id']), + sa.ForeignKeyConstraint(['feed_id'], ['feed.id'])) + op.add_column('category', + sa.Column('cluster_on_title', sa.Boolean(), default=False)) + op.add_column('article', + sa.Column('cluster_id', sa.Integer(), nullable=True)) + + from web.models import Cluster, Feed, Article + from web.models.relationships import cluster_as_feed, cluster_as_category + if 'sqlite' not in conf.SQLALCHEMY_DATABASE_URI: + op.create_foreign_key(None, 'article', 'cluster', + ['cluster_id'], ['id']) + op.create_foreign_key(None, 'cluster', 'article', + ['main_article_id'], ['id']) + + op.execute('CREATE INDEX cluster_uid_date ON cluster ' + '(user_id, main_date DESC NULLS LAST);') + op.execute('CREATE INDEX cluster_liked_uid_date ON cluster ' + '(liked, user_id, main_date DESC NULLS LAST);') + op.execute('CREATE INDEX cluster_read_uid_date ON cluster ' + '(read, user_id, main_date DESC NULLS LAST);') + + print('%s - Creating clusters' % datetime.now().isoformat()) + op.execute(""" + INSERT INTO cluster (main_link, user_id, main_date, read, liked) + SELECT link, user_id, MIN(date), BOOL_AND(readed), BOOL_OR("like") + FROM article GROUP BY link, user_id;""") + + print('%s - Updating clusters with main article infos' + % datetime.now().isoformat()) + op.execute(sa.update(Cluster) + .where(sa.and_(Cluster.main_link == Article.link, + Cluster.user_id == Article.user_id, + Article.feed_id == Feed.id, + Cluster.main_date == Article.date)) + .values(main_title=Article.title, + main_feed_title=Feed.title, + main_article_id=Article.id)) + op.execute("""UPDATE article SET cluster_id = cluster.id + FROM cluster WHERE cluster.main_link = article.link + AND article.user_id = cluster.user_id;""") + + else: + print('%s - Creating clusters' % datetime.now().isoformat()) + op.execute(""" + INSERT INTO cluster (main_link, user_id, main_date, read, liked) + SELECT link, user_id, MIN(date), SUM(readed) = COUNT(id), + SUM("like") > COUNT(id) + FROM article GROUP BY link, user_id;""") + + WHERE = """WHERE cluster.main_link = article.link + AND cluster.user_id = article.user_id + AND cluster.main_date = article.date""" + + print('%s - Updating clusters with main article infos' + % datetime.now().isoformat()) + op.execute("UPDATE cluster SET " + "main_title = (SELECT article.title FROM article %(WHERE)s), " + "main_article_id = (SELECT article.id FROM article %(WHERE)s), " + "main_feed_title = (SELECT feed.title FROM article, feed %(WHERE)s " + "AND article.feed_id = feed.id);" % {'WHERE': WHERE}) + + print('%s - Updating articles' % datetime.now().isoformat()) + op.execute("""UPDATE article + SET cluster_id = (SELECT cluster.id FROM cluster + WHERE cluster.main_link = article.link + AND article.user_id = cluster.user_id); + """) + + print('%s - feeding cluster_as_feed' % datetime.now().isoformat()) + op.execute(""" +INSERT INTO cluster_as_feed (cluster_id, feed_id) +SELECT article.cluster_id, article.feed_id + FROM article GROUP BY article.cluster_id, article.feed_id; +""") + + print('%s - feeding cluster_as_category' % datetime.now().isoformat()) + op.execute(""" +INSERT INTO cluster_as_category (cluster_id, category_id) +SELECT article.cluster_id, article.category_id + FROM article GROUP BY article.cluster_id, article.category_id; +""") + + with op.batch_alter_table('article') as batch_op: + batch_op.drop_column('readed') + batch_op.drop_column('like') + + +def downgrade(): + op.add_column('article', + sa.Column('readed', sa.Boolean(), default=False), + sa.Column('like', sa.Boolean(), default=False)) + op.drop_constraint(None, 'article', type_='foreignkey') + with op.batch_alter_table('article') as batch_op: + batch_op.drop_column('cluster_id') + op.drop_table('cluster_as_feed') + op.drop_table('cluster_as_category') + op.drop_table('cluster') diff --git a/src/runserver.py b/src/runserver.py index 67c015e59..04f53e389 100755 --- a/src/runserver.py +++ b/src/runserver.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- import calendar from flask import request -from flask.ext.babel import Babel +from flask_babel import Babel from bootstrap import conf, application if conf.ON_HEROKU: @@ -32,7 +32,7 @@ def get_timezone(): with application.app_context(): from web import views application.register_blueprint(views.articles_bp) - application.register_blueprint(views.article_bp) + application.register_blueprint(views.cluster_bp) application.register_blueprint(views.feeds_bp) application.register_blueprint(views.feed_bp) application.register_blueprint(views.categories_bp) diff --git a/src/scripts/probes.py b/src/scripts/probes.py index 988c6af17..c6624ad26 100644 --- a/src/scripts/probes.py +++ b/src/scripts/probes.py @@ -1,7 +1,7 @@ #!/usr/bin/python3 import sys from datetime import datetime, timedelta -from flask.ext.script import Command, Option +from flask_script import Command, Option from web.controllers import FeedController, ArticleController from web.models import User diff --git a/src/tests/api/article_test.py b/src/tests/api/article_test.py index 2d3e6ada3..49dab0fc7 100644 --- a/src/tests/api/article_test.py +++ b/src/tests/api/article_test.py @@ -30,36 +30,7 @@ def test_api_list(self): resp = self._api('get', self.urns, data={'limit': 200}, user='admin') self.assertEquals(200, resp.status_code) - self.assertEquals(18, len(resp.json())) - - def test_api_update_many(self): - resp = self._api('put', self.urns, user='user1', - data=[[1, {'like': True}], - [2, {'readed': True}]]) - self.assertEquals(['ok', 'ok'], resp.json()) - self.assertEquals(200, resp.status_code) - resp = self._api('get', self.urn, 1, user='user1') - self.assertEquals(200, resp.status_code) - self.assertTrue(resp.json()['like']) - - resp = self._api('get', self.urn, 2, user='user1') - self.assertEquals(200, resp.status_code) - self.assertTrue(resp.json()['readed']) - - resp = self._api('put', self.urns, user='user1', - data=[[1, {'like': False}], - [15, {'readed': True}]]) - self.assertEquals(206, resp.status_code) - self.assertEquals(['ok', 'nok'], resp.json()) - - resp = self._api('put', self.urns, user='user1', - data=[[16, {'readed': True}], - [17, {'readed': True}]]) - self.assertEquals(500, resp.status_code) - self.assertEquals(['nok', 'nok'], resp.json()) - - resp = self._api('get', self.urn, 17, user='user1') - self.assertEquals(404, resp.status_code) + self.assertEquals(36, len(resp.json())) def test_article_challenge_method(self): articles = self._api('get', self.urns, user='user1').json() @@ -76,11 +47,11 @@ def test_article_challenge_method(self): # user2 doesn't know user1 article, will consider them as knew resp = self._api('get', 'articles/challenge', user='user2', data={'ids': [{'id': art['id']} for art in articles]}) - self.assertEquals(9, len(resp.json())) + self.assertEquals(10, len(resp.json())) # fake ids won't be recognised either and considered as new resp = self._api('get', 'articles/challenge', user='user2', data={'ids': [{'entry_id': art['id']} for art in articles]}) - self.assertEquals(9, len(resp.json())) + self.assertEquals(10, len(resp.json())) def test_api_creation(self): resp = self._api('post', self.urn, user='user1', data={'feed_id': 1}) diff --git a/src/tests/api/category_test.py b/src/tests/api/category_test.py index b07f4741d..0f7967c1c 100644 --- a/src/tests/api/category_test.py +++ b/src/tests/api/category_test.py @@ -10,7 +10,7 @@ def test_api_list(self): resp = self._api('get', self.urns, data={'order_by': '-id'}, user='user1') self.assertEquals(200, resp.status_code) - self.assertEquals(2, len(resp.json())) + self.assertEquals(4, len(resp.json())) self.assertTrue(resp.json()[0]['id'] > resp.json()[-1]['id']) resp = self._api('get', self.urns, data={'limit': 1}, user='user1') @@ -19,11 +19,11 @@ def test_api_list(self): resp = self._api('get', self.urns, user='admin') self.assertEquals(200, resp.status_code) - self.assertEquals(4, len(resp.json())) + self.assertEquals(8, len(resp.json())) resp = self._api('get', self.urns, data={'limit': 200}, user='admin') self.assertEquals(200, resp.status_code) - self.assertEquals(4, len(resp.json())) + self.assertEquals(8, len(resp.json())) def test_api_update_many(self): resp = self._api('put', self.urns, user='user1', diff --git a/src/tests/api/cluster_test.py b/src/tests/api/cluster_test.py new file mode 100644 index 000000000..084a66473 --- /dev/null +++ b/src/tests/api/cluster_test.py @@ -0,0 +1,60 @@ +from tests.base import JarrFlaskCommon +from tests.api.common import ApiCommon + + +class ClusterApiTest(JarrFlaskCommon, ApiCommon): + urn = 'cluster' + urns = 'clusters' + + def test_api_list(self): + resp = self._api('get', self.urns, + data={'order_by': '-id'}, + user='user1') + self.assertEquals(200, resp.status_code) + self.assertEquals(9, len(resp.json())) + self.assertTrue(resp.json()[0]['id'] > resp.json()[-1]['id']) + + resp = self._api('get', self.urns, user='user1') + self.assertEquals(200, resp.status_code) + self.assertEquals(9, len(resp.json())) + + resp = self._api('get', self.urns, data={'limit': 1}, user='user1') + self.assertEquals(200, resp.status_code) + self.assertEquals(1, len(resp.json())) + + resp = self._api('get', self.urns, user='admin') + self.assertEquals(200, resp.status_code) + self.assertEquals(10, len(resp.json())) + + resp = self._api('get', self.urns, data={'limit': 200}, user='admin') + self.assertEquals(200, resp.status_code) + self.assertEquals(18, len(resp.json())) + + def test_api_update_many(self): + resp = self._api('put', self.urns, user='user1', + data=[[1, {'liked': True}], + [2, {'read': True}]]) + self.assertEquals(['ok', 'ok'], resp.json()) + self.assertEquals(200, resp.status_code) + resp = self._api('get', self.urn, 1, user='user1') + self.assertEquals(200, resp.status_code) + self.assertTrue(resp.json()['liked']) + + resp = self._api('get', self.urn, 2, user='user1') + self.assertEquals(200, resp.status_code) + self.assertTrue(resp.json()['read']) + + resp = self._api('put', self.urns, user='user1', + data=[[1, {'liked': False}], + [15, {'read': True}]]) + self.assertEquals(206, resp.status_code) + self.assertEquals(['ok', 'nok'], resp.json()) + + resp = self._api('put', self.urns, user='user1', + data=[[16, {'read': True}], + [17, {'read': True}]]) + self.assertEquals(500, resp.status_code) + self.assertEquals(['nok', 'nok'], resp.json()) + + resp = self._api('get', self.urn, 17, user='user1') + self.assertEquals(404, resp.status_code) diff --git a/src/tests/api/feed_test.py b/src/tests/api/feed_test.py index e20dc1b53..c6d403275 100644 --- a/src/tests/api/feed_test.py +++ b/src/tests/api/feed_test.py @@ -11,7 +11,7 @@ def test_api_list(self): resp = self._api('get', self.urns, data={'order_by': '-id'}, user='user1') self.assertEquals(200, resp.status_code) - self.assertEquals(3, len(resp.json())) + self.assertEquals(6, len(resp.json())) self.assertTrue(resp.json()[0]['id'] > resp.json()[-1]['id']) resp = self._api('get', self.urns, @@ -25,11 +25,11 @@ def test_api_list(self): resp = self._api('get', self.urns, user='admin') self.assertEquals(200, resp.status_code) - self.assertEquals(6, len(resp.json())) + self.assertEquals(10, len(resp.json())) resp = self._api('get', self.urns, data={'limit': 200}, user='admin') self.assertEquals(200, resp.status_code) - self.assertEquals(6, len(resp.json())) + self.assertEquals(12, len(resp.json())) def test_api_update_many(self): resp = self._api('put', self.urns, user='user1', @@ -73,22 +73,24 @@ def test_feed_list_fetchable(self): self.assertEquals(403, resp.status_code) UserController().update({'login__in': ['admin', 'user1']}, {'is_api': True}) - resp = self._api('get', 'feeds/fetchable', user='user1') - self.assertEquals(3, len(resp.json())) + resp = self._api('get', 'feeds/fetchable', user='user1', + data={'limit': 100}) + self.assertEquals(6, len(resp.json())) self.assertEquals(200, resp.status_code) resp = self._api('get', 'feeds/fetchable', user='user1') self.assertEquals(204, resp.status_code) - resp = self._api('get', 'feeds/fetchable', user='admin') - self.assertEquals(3, len(resp.json())) + resp = self._api('get', 'feeds/fetchable', user='admin', + data={'limit': 100}) + self.assertEquals(6, len(resp.json())) self.assertEquals(200, resp.status_code) resp = self._api('get', 'feeds/fetchable', user='admin') self.assertEquals(204, resp.status_code) resp = self._api('get', 'feeds/fetchable', user='user1', data={'refresh_rate': 0}) - self.assertEquals(3, len(resp.json())) + self.assertEquals(5, len(resp.json())) resp = self._api('get', 'feeds/fetchable', user='admin', data={'refresh_rate': 0}) self.assertEquals(5, len(resp.json())) diff --git a/src/tests/base.py b/src/tests/base.py index 75a04e5ba..65b87e444 100644 --- a/src/tests/base.py +++ b/src/tests/base.py @@ -8,7 +8,7 @@ from base64 import b64encode from runserver import application from tests.fixtures.filler import populate_db, reset_db -from flask.ext.login import login_user, logout_user +from flask_login import login_user, logout_user from werkzeug.exceptions import NotFound from bootstrap import conf diff --git a/src/tests/controllers/article_test.py b/src/tests/controllers/article_test.py index a36b96916..17f330808 100644 --- a/src/tests/controllers/article_test.py +++ b/src/tests/controllers/article_test.py @@ -1,5 +1,6 @@ from tests.base import BaseJarrTest -from web.controllers import UserController, ArticleController, FeedController +from web.controllers import (UserController, ArticleController, FeedController, + ClusterController) class ArticleControllerTest(BaseJarrTest): @@ -10,97 +11,97 @@ def test_article_rights(self): self._test_controller_rights(article, UserController().get(id=article['user_id'])) - def test_article_get_unread(self): - self.assertEquals({1: 3, 2: 3, 3: 3}, - ArticleController(2).count_by_feed(readed=False)) - self.assertEquals({4: 3, 5: 3, 6: 3}, - ArticleController(3).count_by_feed(readed=False)) - def test_create_using_filters(self): feed_ctr = FeedController(2) feed1 = feed_ctr.read()[0].dump() feed2 = feed_ctr.read()[1].dump() feed3 = feed_ctr.read()[2].dump() - feed_ctr.update({'id': feed1['id']}, - {'filters': [{"type": "simple match", - "pattern": "no see pattern", - "action on": "match", - "action": "mark as read"}]}) feed_ctr.update({'id': feed3['id']}, {'filters': [{"type": "regex", "pattern": ".*(pattern1|pattern2).*", "action on": "no match", "action": "mark as favorite"}, {"type": "simple match", - "pattern": "no see pattern", + "pattern": "pattern3", "action on": "match", "action": "mark as read"}]}) + feed_ctr.update({'id': feed1['id']}, + {'filters': [{"type": "simple match", + "pattern": "pattern3", + "action on": "match", + "action": "mark as read"}]}) + art1 = ArticleController(2).create( - entry_id="thisisnotatest", + entry_id="will be read and faved 1", feed_id=feed1['id'], - title="garbage no see pattern garbage", + title="garbage pattern1 pattern3 garbage", content="doesn't matter", - link="doesn't matter either") + link="cluster1") + self.assertTrue(art1.cluster.read) + self.assertFalse(art1.cluster.liked) + art2 = ArticleController(2).create( - entry_id="thisisnotatesteither", + entry_id="will be ignored 2", feed_id=feed1['id'], title="garbage see pattern garbage", content="doesn't matter2", - link="doesn't matter either2") + link="is ignored 2") + self.assertFalse(art2.cluster.read) + self.assertFalse(art2.cluster.liked) art3 = ArticleController(2).create( - entry_id="thisisnotatest", + entry_id="will be read 3", user_id=2, feed_id=feed2['id'], - title="garbage no see pattern garbage", + title="garbage pattern3 garbage", content="doesn't matter", - link="doesn't matter either") + link="doesn't matter either3") + self.assertFalse(art3.cluster.read) + self.assertFalse(art3.cluster.liked) + art4 = ArticleController(2).create( - entry_id="thisisnotatesteither", + entry_id="will be ignored 4", user_id=2, feed_id=feed2['id'], title="garbage see pattern garbage", content="doesn't matter2", - link="doesn't matter either2") + link="doesn't matter either4") + self.assertFalse(art4.cluster.read) + self.assertFalse(art4.cluster.liked) art5 = ArticleController(2).create( - entry_id="thisisnotatest", + entry_id="will be faved 5", feed_id=feed3['id'], - title="garbage pattern1 garbage", + title="garbage anti-attern3 garbage", content="doesn't matter", - link="doesn't matter either") + link="cluster1") + self.assertTrue(art5.cluster.read, + "should be read because it clustered") + self.assertTrue(art5.cluster.liked) + art6 = ArticleController(2).create( - entry_id="thisisnotatesteither", + entry_id="will be faved 6", feed_id=feed3['id'], - title="garbage pattern2 garbage", + title="garbage pattern1 garbage", content="doesn't matter2", - link="doesn't matter either2") + link="doesn't matter 6") + self.assertFalse(art6.cluster.read) + self.assertFalse(art6.cluster.liked) + art7 = ArticleController(2).create( - entry_id="thisisnotatesteither", + entry_id="will be read 7", feed_id=feed3['id'], - title="garbage no see pattern3 garbage", + title="garbage pattern3 garbage", content="doesn't matter3", - link="doesn't matter either3") + link="doesn't matter either7") + self.assertTrue(art7.cluster.read) + self.assertTrue(art7.cluster.liked) + art8 = ArticleController(2).create( - entry_id="thisisnotatesteither", + entry_id="will be ignored", feed_id=feed3['id'], title="garbage pattern4 garbage", content="doesn't matter4", - link="doesn't matter either4") - - self.assertTrue(art1.readed) - self.assertFalse(art1.like) - self.assertFalse(art2.readed) - self.assertFalse(art2.like) - self.assertFalse(art3.readed) - self.assertFalse(art3.like) - self.assertFalse(art4.readed) - self.assertFalse(art4.like) - self.assertFalse(art5.readed) - self.assertFalse(art5.like) - self.assertFalse(art6.readed) - self.assertFalse(art6.like) - self.assertTrue(art7.readed) - self.assertTrue(art7.like) - self.assertFalse(art8.readed) - self.assertTrue(art8.like) + link="doesn't matter either8") + self.assertFalse(art8.cluster.read) + self.assertTrue(art8.cluster.liked) diff --git a/src/tests/controllers/cluster_test.py b/src/tests/controllers/cluster_test.py new file mode 100644 index 000000000..2bdd66f0a --- /dev/null +++ b/src/tests/controllers/cluster_test.py @@ -0,0 +1,120 @@ +from random import randint +from datetime import timedelta +from tests.base import BaseJarrTest +from web.controllers import (ArticleController, ClusterController, + FeedController, CategoryController) + + +class ClusterControllerTest(BaseJarrTest): + _contr_cls = ClusterController + + def test_article_get_unread(self): + self.assertEquals({1: 3, 2: 3, 3: 3, 7: 3, 8: 3, 9: 3}, + ClusterController(2).count_by_feed(read=False)) + self.assertEquals({4: 3, 5: 3, 6: 3, 10: 3, 11: 3, 12: 3}, + ClusterController(3).count_by_feed(read=False)) + + def test_adding_to_cluster_by_link(self): + acontr = ArticleController() + ccontr = ClusterController() + cluster = ccontr.get(id=6) + ccontr.update({'id': 6}, {'read': True}) + article = cluster.articles[0] + articles_count = len(cluster.articles) + feed = FeedController(cluster.user_id).read( + user_id=article.user_id, + id__ne=article.feed_id).first() + suffix = str(randint(0, 9999)) + acontr.create( + user_id=article.user_id, + feed_id=feed.id, + entry_id=article.entry_id + suffix, + link=article.link, + title=article.title + suffix, + content=article.content + suffix, + date=article.date + timedelta(1), + retrieved_date=article.retrieved_date + timedelta(1), + ) + cluster = ccontr.get(id=6) + self.assertEquals(articles_count + 1, len(cluster.articles)) + self.assertTrue(cluster.read) + + def test_adding_to_cluster_by_title(self): + article = ArticleController().get(category_id=1) + acontr = ArticleController(article.user_id) + ccontr = ClusterController(article.user_id) + cluster = article.cluster + articles_count = len(cluster.articles) + suffix = str(randint(0, 9999)) + feed = FeedController(article.user_id).create(link=suffix, + user_id=article.user_id, category_id=article.category_id, + title=suffix) + + # testing with non activated category + acontr.create( + user_id=article.user_id, + feed_id=feed.id, + entry_id=article.entry_id + suffix, + link=article.link + suffix, + title=article.title, + content=article.content + suffix, + date=article.date, + retrieved_date=article.retrieved_date, + ) + cluster = ccontr.get(id=cluster.id) + self.assertEquals(articles_count, len(cluster.articles)) + + # testing with activated category + CategoryController().update({'id': article.category_id}, + {'cluster_on_title': True}) + acontr.create( + user_id=article.user_id, + feed_id=feed.id, + category_id=article.category_id, + entry_id=article.entry_id + suffix, + link=article.link + suffix + suffix, + title=article.title, + content=article.content + suffix, + date=article.date, + retrieved_date=article.retrieved_date, + ) + cluster = ccontr.get(id=cluster.id) + self.assertEquals(articles_count + 1, len(cluster.articles)) + + def test_no_mixup(self): + acontr = ArticleController() + ccontr = ClusterController() + total_clusters = len(list(ccontr.read())) + total_articles = len(list(acontr.read())) + for cluster in ccontr.read(): + self.assertEquals(2, len(cluster.articles)) + + for article in acontr.read(): + acontr.create( + entry_id=article.entry_id, + feed_id=article.feed_id, + title=article.title, + content=article.content, + link=article.link) + + self.assertEquals(2 * total_articles, len(list(acontr.read()))) + self.assertEquals(total_clusters, len(list(ccontr.read()))) + + for cluster in ccontr.read(): + self.assertEquals(4, len(cluster.articles)) + self.assertEquals(1, + len(set([a.user_id for a in cluster.articles]))) + + main_article = acontr.read().first() + for article in acontr.read(): + acontr.create( + user_id=main_article.user_id, + feed_id=main_article.feed_id, + entry_id=article.entry_id, + title=article.title, + content=article.content, + link=article.link) + + for cluster in ccontr.read(): + self.assertEquals(1, + len(set([a.user_id for a in cluster.articles]))) diff --git a/src/tests/crawler_test.py b/src/tests/crawler_test.py index a83663238..1e6fc09fe 100644 --- a/src/tests/crawler_test.py +++ b/src/tests/crawler_test.py @@ -78,12 +78,12 @@ def _reset_feeds_freshness(self, **kwargs): def test_http_crawler_add_articles(self): scheduler = CrawlerScheduler('admin', 'admin') resp = self._api('get', 'articles', data={'limit': 1000}, user='admin') - self.assertEquals(18, len(resp.json())) + self.assertEquals(36, len(resp.json())) scheduler.run() scheduler.wait() resp = self._api('get', 'articles', data={'limit': 1000}, user='admin') - self.assertEquals(143, len(resp.json())) + self.assertEquals(161, len(resp.json())) for art in resp.json(): self.assertFalse('srcset=' in art['content']) @@ -93,30 +93,30 @@ def test_http_crawler_add_articles(self): scheduler.run() scheduler.wait() resp = self._api('get', 'articles', data={'limit': 1000}, user='admin') - self.assertEquals(143, len(resp.json())) + self.assertEquals(161, len(resp.json())) def test_no_add_on_304(self): scheduler = CrawlerScheduler('admin', 'admin') self.resp_status_code = 304 resp = self._api('get', 'articles', data={'limit': 1000}, user='admin') - self.assertEquals(18, len(resp.json())) + self.assertEquals(36, len(resp.json())) scheduler.run() scheduler.wait() resp = self._api('get', 'articles', data={'limit': 1000}, user='admin') - self.assertEquals(18, len(resp.json())) + self.assertEquals(36, len(resp.json())) def test_matching_etag(self): self._reset_feeds_freshness(etag='fake etag') self.resp_headers = {'etag': 'fake etag'} resp = self._api('get', 'articles', data={'limit': 1000}, user='admin') - self.assertEquals(18, len(resp.json())) + self.assertEquals(36, len(resp.json())) scheduler = CrawlerScheduler('admin', 'admin') scheduler.run() scheduler.wait() resp = self._api('get', 'articles', data={'limit': 1000}, user='admin') - self.assertEquals(18, len(resp.json())) + self.assertEquals(36, len(resp.json())) self._reset_feeds_freshness(etag='jarr/fake etag') self.resp_headers = {'etag': 'jarr/fake etag'} @@ -124,7 +124,7 @@ def test_matching_etag(self): scheduler.run() scheduler.wait() resp = self._api('get', 'articles', data={'limit': 1000}, user='admin') - self.assertEquals(18, len(resp.json())) + self.assertEquals(36, len(resp.json())) self._reset_feeds_freshness(etag='jarr/fake etag') self.resp_headers = {'etag': '########################'} @@ -132,4 +132,4 @@ def test_matching_etag(self): scheduler.run() scheduler.wait() resp = self._api('get', 'articles', data={'limit': 1000}, user='admin') - self.assertEquals(143, len(resp.json())) + self.assertEquals(161, len(resp.json())) diff --git a/src/tests/fixtures/filler.py b/src/tests/fixtures/filler.py index bd9152a8a..31533f19e 100644 --- a/src/tests/fixtures/filler.py +++ b/src/tests/fixtures/filler.py @@ -14,23 +14,35 @@ def populate_db(): password=name) for name in ["user1", "user2"]] - article_total = 0 - for user in (user1, user2): - for i in range(3): - cat_id = None - if i: - cat_id = ccontr.create(user_id=user.id, - name="category%d" % i).id - feed = fcontr.create(link="feed%d" % i, user_id=user.id, + def to_name(u, c=None, f=None, a=None): + string = u.login + if c: + string += " cat%s" % c + if f is not None: + string += " feed%s" % f + if a is not None: + string += " art%s" % a + return string + + for _ in range(2): + article_total = 0 + for user in (user1, user2): + for i in range(3): + cat_id = None + if i: + cat_id = ccontr.create(user_id=user.id, + name=to_name(user, i)).id + feed = fcontr.create(link="feed%d" % i, user_id=user.id, category_id=cat_id, - title="%s feed%d" % (user.login, i)) - for j in range(3): - entry = "%s %s article%d" % (user.login, feed.title, j) - article_total += 1 - acontr.create(entry_id=entry, - link='http://test.te/%d' % article_total, - feed_id=feed.id, user_id=user.id, category_id=cat_id, - title=entry, content="content %d" % article_total) + title=to_name(user, i, i)) + for j in range(3): + entry = to_name(user, i, i, j) + article_total += 1 + acontr.create(entry_id=entry, + link='http://test.te/%d' % article_total, + feed_id=feed.id, user_id=user.id, + category_id=cat_id, title=entry, + content="content %d" % article_total) def reset_db(): db_empty() diff --git a/src/tests/ui_test.py b/src/tests/ui_test.py index 6c6038e21..fe0ffe899 100644 --- a/src/tests/ui_test.py +++ b/src/tests/ui_test.py @@ -19,11 +19,13 @@ def test_middle_panel(self): resp = self.app.get('/middle_panel') self.assertEquals(200, resp.status_code) self.assertEquals(9, - len(json.loads(resp.data.decode('utf8'))['articles'])) + len(json.loads(resp.data.decode('utf8'))['clusters'])) resp = self.app.get('/middle_panel?filter=unread') self.assertEquals(200, resp.status_code) self.assertEquals(9, - len(json.loads(resp.data.decode('utf8'))['articles'])) + len(json.loads(resp.data.decode('utf8'))['clusters'])) + + def test_search(self): resp = self.app.get('/middle_panel?query=test') self.assertEquals(200, resp.status_code) resp = self.app.get('/middle_panel?query=test&search_title=true') @@ -33,27 +35,45 @@ def test_middle_panel(self): resp = self.app.get('/middle_panel?query=test' '&search_title=true&search_content=true') self.assertEquals(200, resp.status_code) - resp = self.app.get('/middle_panel?filed_type=feed&filter_id=1') - self.assertEquals(200, resp.status_code) - resp = self.app.get('/middle_panel?filter_type=category&filter_id=0') + + def test_middle_panel_filtered_on_category(self): + cat_id = 1 + resp = self.app.get( + '/middle_panel?filter_type=category_id&filter_id=%d' % cat_id) self.assertEquals(200, resp.status_code) + clusters = json.loads(resp.data.decode('utf8'))['clusters'] + for cluster in clusters: + self.assertTrue(cat_id in cluster['categories_id'], + "%d not in %r" % (cat_id, cluster['categories_id'])) + self.assertEquals(3, len(clusters)) + + def test_middle_panel_filtered_on_feed(self): + feed_id = 3 + resp = self.app.get( + '/middle_panel?filter_type=feed_id&filter_id=%d' % feed_id) + clusters = json.loads(resp.data.decode('utf8'))['clusters'] + for cluster in clusters: + self.assertTrue(feed_id in cluster['feeds_id'], + "%d not in %r" % (feed_id, cluster['feeds_id'])) + self.assertEquals(3, len(clusters)) + self.assertEquals(200, resp.status_code) # marking all as read - # marking all as read + def test_mark_all_as_read(self): resp = self.app.put('/mark_all_as_read', data='{}', headers={'Content-Type': 'application/json'}) self.assertEquals(200, resp.status_code) resp = self.app.get('/middle_panel?filter=unread') self.assertEquals(200, resp.status_code) self.assertEquals(0, - len(json.loads(resp.data.decode('utf8'))['articles'])) + len(json.loads(resp.data.decode('utf8'))['clusters'])) - def test_getart(self): - resp = self.app.get('/getart/1', + def test_getclu(self): + resp = self.app.get('/getclu/1', headers={'Content-Type': 'application/json'}) self.assertEquals(200, resp.status_code) self.app.get('/logout') self.app.post('/login', data={'login': 'user2', 'password': 'user2'}) - resp = self.app.get('/getart/1', + resp = self.app.get('/getclu/1', headers={'Content-Type': 'application/json'}) self.assertEquals(404, resp.status_code) diff --git a/src/web/controllers/__init__.py b/src/web/controllers/__init__.py index a1b89ea8d..cda7f37bd 100644 --- a/src/web/controllers/__init__.py +++ b/src/web/controllers/__init__.py @@ -3,7 +3,8 @@ from .article import ArticleController from .user import UserController from .icon import IconController +from .cluster import ClusterController __all__ = ['FeedController', 'CategoryController', 'ArticleController', - 'UserController', 'IconController'] + 'UserController', 'IconController', 'ClusterController'] diff --git a/src/web/controllers/abstract.py b/src/web/controllers/abstract.py index 57194ee13..d4579717e 100644 --- a/src/web/controllers/abstract.py +++ b/src/web/controllers/abstract.py @@ -3,7 +3,7 @@ from bootstrap import db from datetime import datetime from collections import defaultdict -from sqlalchemy import and_, or_, func +from sqlalchemy import and_, or_ from werkzeug.exceptions import Forbidden, NotFound logger = logging.getLogger(__name__) @@ -25,7 +25,8 @@ def __init__(self, user_id=None, ignore_context=False): except TypeError: self.user_id = user_id - def _to_filters(self, **filters): + @classmethod + def _to_filters(cls, **filters): """ Will translate filters to sqlalchemy filter. This method will also apply user_id restriction if available. @@ -37,26 +38,26 @@ def _to_filters(self, **filters): db_filters = set() for key, value in filters.items(): if key == '__or__': - db_filters.add(or_(*[and_(*self._to_filters(**sub_filter)) + db_filters.add(or_(*[and_(*cls._to_filters(**sub_filter)) for sub_filter in value])) elif key.endswith('__gt'): - db_filters.add(getattr(self._db_cls, key[:-4]) > value) + db_filters.add(getattr(cls._db_cls, key[:-4]) > value) elif key.endswith('__lt'): - db_filters.add(getattr(self._db_cls, key[:-4]) < value) + db_filters.add(getattr(cls._db_cls, key[:-4]) < value) elif key.endswith('__ge'): - db_filters.add(getattr(self._db_cls, key[:-4]) >= value) + db_filters.add(getattr(cls._db_cls, key[:-4]) >= value) elif key.endswith('__le'): - db_filters.add(getattr(self._db_cls, key[:-4]) <= value) + db_filters.add(getattr(cls._db_cls, key[:-4]) <= value) elif key.endswith('__ne'): - db_filters.add(getattr(self._db_cls, key[:-4]) != value) + db_filters.add(getattr(cls._db_cls, key[:-4]) != value) elif key.endswith('__in'): - db_filters.add(getattr(self._db_cls, key[:-4]).in_(value)) + db_filters.add(getattr(cls._db_cls, key[:-4]).in_(value)) elif key.endswith('__like'): - db_filters.add(getattr(self._db_cls, key[:-6]).like(value)) + db_filters.add(getattr(cls._db_cls, key[:-6]).like(value)) elif key.endswith('__ilike'): - db_filters.add(getattr(self._db_cls, key[:-7]).ilike(value)) + db_filters.add(getattr(cls._db_cls, key[:-7]).ilike(value)) else: - db_filters.add(getattr(self._db_cls, key) == value) + db_filters.add(getattr(cls._db_cls, key) == value) return db_filters def _get(self, **filters): @@ -98,10 +99,12 @@ def create(self, **attrs): def read(self, **filters): return self._get(**filters) - def update(self, filters, attrs): + def update(self, filters, attrs, return_objs=False): assert attrs, "attributes to update must not be empty" result = self._get(**filters).update(attrs, synchronize_session=False) db.session.commit() + if return_objs: + return self._get(**filters) return result def delete(self, obj_id): @@ -117,26 +120,30 @@ def _has_right_on(self, obj): return self.user_id is None \ or getattr(obj, self._user_id_key, None) == self.user_id - def _count_by(self, elem_to_group_by, filters): - if self.user_id: - filters['user_id'] = self.user_id - return dict(db.session.query(elem_to_group_by, func.count('id')) - .filter(*self._to_filters(**filters)) - .group_by(elem_to_group_by).all()) + @classmethod + def _extra_columns(cls, role, right=None): + return {} @classmethod def _get_attrs_desc(cls, role, right=None): result = defaultdict(dict) if role == 'admin': - columns = cls._db_cls.__table__.columns.keys() + columns = set(cls._db_cls.__table__.columns.keys())\ + .union(cls._db_cls.fields_base_read())\ + .union(cls._db_cls.fields_base_write())\ + .union(cls._db_cls.fields_api_read())\ + .union(cls._db_cls.fields_api_write()) else: assert role in {'base', 'api'}, 'unknown role %r' % role assert right in {'read', 'write'}, \ "right must be 'read' or 'write' with role %r" % role columns = getattr(cls._db_cls, 'fields_%s_%s' % (role, right))() for column in columns: + try: + db_col = getattr(cls._db_cls, column).property.columns[0] + except AttributeError: + continue result[column] = {} - db_col = getattr(cls._db_cls, column).property.columns[0] try: result[column]['type'] = db_col.type.python_type except NotImplementedError: @@ -149,4 +156,5 @@ def _get_attrs_desc(cls, role, right=None): result[column]['type'] = lambda x: dateutil.parser.parse(x) elif db_col.default: result[column]['default'] = db_col.default.arg + result.update(cls._extra_columns(role, right)) return result diff --git a/src/web/controllers/article.py b/src/web/controllers/article.py index d1c04b31d..61beee1d2 100644 --- a/src/web/controllers/article.py +++ b/src/web/controllers/article.py @@ -23,21 +23,24 @@ def challenge(self, ids): continue yield id_ - def count_by_category(self, **filters): - return self._count_by(Article.category_id, filters) - def count_by_feed(self, **filters): - return self._count_by(Article.feed_id, filters) + if self.user_id: + filters['user_id'] = self.user_id + return dict(db.session.query(Article.feed_id, func.count('id')) + .filter(*self._to_filters(**filters)) + .group_by(Article.feed_id).all()) def count_by_user_id(self, **filters): last_conn_max = datetime.utcnow() - timedelta(days=30) return dict(db.session.query(Article.user_id, func.count(Article.id)) .filter(*self._to_filters(**filters)) - .join(User).filter(User.is_active == True, + .join(User).filter(User.is_active.__eq__(True), User.last_connection >= last_conn_max) .group_by(Article.user_id).all()) def create(self, **attrs): + from web.controllers.cluster import ClusterController + cluster_contr = ClusterController(self.user_id) # handling special denorm for article rights assert 'feed_id' in attrs, "must provide feed_id when creating article" feed = FeedController( @@ -48,6 +51,7 @@ def create(self, **attrs): attrs['user_id'], attrs['category_id'] = feed.user_id, feed.category_id # handling feed's filters + cluster_read, cluster_liked = None, False for filter_ in feed.filters or []: match = False if filter_.get('type') == 'regex': @@ -61,17 +65,19 @@ def create(self, **attrs): continue if filter_.get('action') == 'mark as read': - attrs['readed'] = True + cluster_read = True logger.warn("article %s will be created as read", attrs['link']) elif filter_.get('action') == 'mark as favorite': - attrs['like'] = True + cluster_liked = True logger.warn("article %s will be created as liked", attrs['link']) - return super().create(**attrs) + article = super().create(**attrs) + cluster_contr.clusterize(article, cluster_read, cluster_liked) + return article - def update(self, filters, attrs): + def update(self, filters, attrs, *args, **kwargs): user_id = attrs.get('user_id', self.user_id) if 'feed_id' in attrs: feed = FeedController().get(id=attrs['feed_id']) @@ -82,7 +88,7 @@ def update(self, filters, attrs): cat = CategoryController().get(id=attrs['category_id']) assert self.user_id is None or cat.user_id == user_id, \ "no right on cat %r" % cat.id - return super().update(filters, attrs) + return super().update(filters, attrs, *args, **kwargs) def get_history(self, year=None, month=None): "Sort articles by year and month." @@ -101,8 +107,3 @@ def get_history(self, year=None, month=None): else: articles_counter[article.date.year] += 1 return articles_counter, articles - - def read_light(self, **filters): - return super().read(**filters).with_entities(Article.id, Article.title, - Article.readed, Article.like, Article.feed_id, Article.date, - Article.category_id).order_by(Article.date.desc()) diff --git a/src/web/controllers/category.py b/src/web/controllers/category.py index fef5ca81b..01ec4e3c4 100644 --- a/src/web/controllers/category.py +++ b/src/web/controllers/category.py @@ -5,8 +5,3 @@ class CategoryController(AbstractController): _db_cls = Category - - def delete(self, obj_id): - FeedController(self.user_id).update({'category_id': obj_id}, - {'category_id': None}) - return super().delete(obj_id) diff --git a/src/web/controllers/cluster.py b/src/web/controllers/cluster.py new file mode 100644 index 000000000..d96013a08 --- /dev/null +++ b/src/web/controllers/cluster.py @@ -0,0 +1,183 @@ +import logging +from bootstrap import conf, db + +from sqlalchemy import func, Integer, or_, and_ +from sqlalchemy.sql import select, exists +from sqlalchemy.dialects.postgres import ARRAY +from werkzeug.exceptions import NotFound +from .abstract import AbstractController +from web.models import Cluster, Article +from web.models.relationships import cluster_as_feed, cluster_as_category +from web.controllers.article import ArticleController + +logger = logging.getLogger(__name__) + + +class ClusterController(AbstractController): + _db_cls = Cluster + + def _get_cluster_by_link(self, article): + return self.read(user_id=article.user_id, + main_link=article.link).first() + + def _get_cluster_by_title(self, article): + if article.category and article.category.cluster_on_title: + try: + article = ArticleController(self.user_id).get( + user_id=article.user_id, + category_id=article.category_id, + title__ilike=article.title) + except NotFound: + return + return article.cluster + + def _create_from_article(self, article, + cluster_read=None, cluster_liked=False): + cluster = Cluster() + cluster.user_id = article.user_id + cluster.main_link = article.link + cluster.main_date = article.date + cluster.main_feed_title = article.feed.title + cluster.main_title = article.title + cluster.main_article_id = article.id + cluster.read = bool(cluster_read) + cluster.liked = cluster_liked + self._enrich_cluster(cluster, article, cluster_read, cluster_liked) + + def _enrich_cluster(self, cluster, article, + cluster_read=None, cluster_liked=False): + article.cluster = cluster + # a cluster + if cluster_read is not None: + cluster.read = cluster.read and cluster_read + # once one article is liked the cluster is liked + cluster.liked = cluster.liked or cluster_liked + if cluster.main_date > article.date: + cluster.main_title = article.title + cluster.main_date = article.date + cluster.main_feed_title = article.feed.title + cluster.main_article_id = article.id + cluster.feeds.append(article.feed) + if article.category_id: + cluster.categories.append(article.category) + db.session.add(cluster) + db.session.add(article) + db.session.commit() + + def clusterize(self, article, cluster_read=None, cluster_liked=False): + """Will add given article to a fitting cluster or create a cluster + fitting that article.""" + cluster = self._get_cluster_by_link(article) + if not cluster: + cluster = self._get_cluster_by_title(article) + if cluster: + return self._enrich_cluster(cluster, article, + cluster_read, cluster_liked) + return self._create_from_article(article, cluster_read, cluster_liked) + + def join_read(self, feed_id=None, category_id=None, **filters): + art_filters = {} + if self.user_id: + filters['user_id'] = self.user_id + + for key in {'__or__', 'title__ilike', 'content__ilike'}\ + .intersection(filters): + art_filters[key] = filters.pop(key) + + if art_filters: + art_contr = ArticleController(self.user_id) + filters['id__in'] = {line[0] for line in art_contr + .read(**art_filters).with_entities(Article.cluster_id)} + + if not filters['id__in']: + return + + caf_cols = cluster_as_feed.columns + cac_cols = cluster_as_category.columns + fields = {key: getattr(Cluster, key) for key in ('main_title', 'id', + 'liked', 'read', 'main_article_id', 'main_feed_title', + 'main_date', 'main_link')} + sqla_fields = list(fields.values()) + selected_fields = list(fields.values()) + + if 'sqlite' in conf.SQLALCHEMY_DATABASE_URI: + selected_fields.append( + func.group_concat(caf_cols['feed_id']).label('feeds_id')) + if category_id: + selected_fields.append(func.group_concat( + cac_cols['category_id']).label('categories_id')) + else: + selected_fields.append(func.array_agg(caf_cols['feed_id'], + type_=ARRAY(Integer)).label('feeds_id')) + if category_id: + selected_fields.append(func.array_agg(cac_cols['category_id'], + type_=ARRAY(Integer)).label('categories_id')) + + # DESC of what's going on below : + # base query with the above fields and the aggregations + query = db.session.query(*selected_fields) + + # adding parent filter, but we can't just filter on one id, because + # we'll miss all the other parent of the cluster + if feed_id: + cluster_has_feed = exists(select([caf_cols['feed_id']]) + .where(and_(caf_cols['cluster_id'] == Cluster.id, + caf_cols['feed_id'] == feed_id)) + .correlate(Cluster)) + query = query.join(cluster_as_feed, + caf_cols['cluster_id'] == Cluster.id)\ + .filter(cluster_has_feed) + else: + query = query.join(cluster_as_feed, + caf_cols['cluster_id'] == Cluster.id) + if category_id: + # joining only if filtering on categories to lighten the query + # as every article doesn't obligatorily have a category > outerjoin + cluster_has_category = exists(select([cac_cols['category_id']]) + .where(and_(cac_cols['cluster_id'] == Cluster.id, + cac_cols['category_id'] == category_id)) + .correlate(Cluster)) + query = query.join(cluster_as_category, + cac_cols['cluster_id'] == Cluster.id)\ + .filter(cluster_has_category) + + # applying common filter (read / liked) + # grouping all the fields so that agreg works on distant ids + query = query.group_by(*sqla_fields)\ + .filter(*self._to_filters(**filters)) + + for clu in query.order_by(Cluster.main_date.desc()).limit(1000): + row = {} + for key in fields: + row[key] = getattr(clu, key) + if 'sqlite' in conf.SQLALCHEMY_DATABASE_URI: + row['feeds_id'] = set(map(int, clu.feeds_id.split(','))) + if category_id and clu.categories_id: + row['categories_id'] = set( + map(int, clu.categories_id.split(','))) + elif category_id: + row['categories_id'] = [0] + else: + row['feeds_id'] = set(clu.feeds_id) + if category_id: + row['categories_id'] = set(clu.categories_id) + yield row + + @classmethod + def _extra_columns(cls, role, right): + return {'articles': {'type': list}} + + def count_by_feed(self, **filters): + return self._count_by(cluster_as_feed.columns['feed_id'], **filters) + + def count_by_category(self, **filters): + return self._count_by(cluster_as_category.columns['category_id'], + **filters) + + def _count_by(self, group_on, **filters): + if self.user_id: + filters['user_id'] = self.user_id + return dict(db.session.query(group_on, func.count('cluster_id')) + .outerjoin(Cluster) + .filter(*self._to_filters(**filters)) + .group_by(group_on).all()) diff --git a/src/web/controllers/feed.py b/src/web/controllers/feed.py index 033bfa977..4dc13357b 100644 --- a/src/web/controllers/feed.py +++ b/src/web/controllers/feed.py @@ -48,7 +48,7 @@ def list_late(self, delta, max_error=conf.FEED_ERROR_MAX, __or__=[{'last_retrieved__lt': feed_last_retrieved}, {'last_retrieved__lt': min_wait, 'id__in': new_art_feed}]) - .join(User).filter(User.is_active == True, + .join(User).filter(User.is_active.__eq__(True), User.last_connection >= last_conn_max) .order_by(Feed.last_retrieved)) if limit: @@ -117,11 +117,11 @@ def create(self, **attrs): self.__clean_feed_fields(attrs) return super().create(**attrs) - def update(self, filters, attrs): + def update(self, filters, attrs, *args, **kwargs): self._ensure_icon(attrs) self.__clean_feed_fields(attrs) if 'category_id' in attrs: for feed in self.read(**filters): self.__get_art_contr().update({'feed_id': feed.id}, {'category_id': attrs['category_id']}) - return super().update(filters, attrs) + return super().update(filters, attrs, *args, **kwargs) diff --git a/src/web/controllers/icon.py b/src/web/controllers/icon.py index 7a4bc9d4d..60b7c2430 100644 --- a/src/web/controllers/icon.py +++ b/src/web/controllers/icon.py @@ -24,7 +24,8 @@ def create(self, **attrs): return super().create(**self._build_from_url(attrs)) def update(self, filters, attrs): - return super().update(filters, self._build_from_url(attrs)) + attrs = self._build_from_url(attrs) + return super().update(filters, attrs, *args, **kwargs) def delete(self, url): obj = self.get(url=url) diff --git a/src/web/controllers/user.py b/src/web/controllers/user.py index 54213987f..e0c2caf9a 100644 --- a/src/web/controllers/user.py +++ b/src/web/controllers/user.py @@ -23,6 +23,6 @@ def create(self, **attrs): self._handle_password(attrs) return super().create(**attrs) - def update(self, filters, attrs): + def update(self, filters, attrs, *args, **kwargs): self._handle_password(attrs) - return super().update(filters, attrs) + return super().update(filters, attrs, *args, **kwargs) diff --git a/src/web/forms.py b/src/web/forms.py index 34ee57221..1071ea07f 100644 --- a/src/web/forms.py +++ b/src/web/forms.py @@ -1,10 +1,10 @@ -from flask.ext.wtf import Form +from flask_wtf import Form from flask import url_for, redirect -from flask.ext.babel import lazy_gettext +from flask_babel import lazy_gettext from werkzeug.exceptions import NotFound from wtforms import TextField, PasswordField, BooleanField, \ SubmitField, SelectField, validators, HiddenField -from flask.ext.wtf.html5 import EmailField +from flask_wtf.html5 import EmailField from web import utils from web.controllers import UserController diff --git a/src/web/js/actions/MenuActions.js b/src/web/js/actions/MenuActions.js index c514d3e66..69ceed89e 100644 --- a/src/web/js/actions/MenuActions.js +++ b/src/web/js/actions/MenuActions.js @@ -9,7 +9,7 @@ var MenuActions = { // PARENT FILTERS reload: function(set_filter, setFilterFunc, id) { jquery.getJSON('/menu', function(payload) { - var old_all_unread_count = MenuStore._datas['all_unread_count']; + var old_all_unread_count = MenuStore.all_unread_count; JarrDispatcher.dispatch({ type: ActionTypes.RELOAD_MENU, feeds: payload.feeds, diff --git a/src/web/js/actions/MiddlePanelActions.js b/src/web/js/actions/MiddlePanelActions.js index efae516a1..cd9107d63 100644 --- a/src/web/js/actions/MiddlePanelActions.js +++ b/src/web/js/actions/MiddlePanelActions.js @@ -23,8 +23,7 @@ var shouldFetch = function(filters) { } var reloadIfNecessaryAndDispatch = function(dispath_payload) { if(shouldFetch(dispath_payload)) { - var filters = MiddlePanelStore.getRequestFilter( - dispath_payload.display_search); + var filters = MiddlePanelStore.getRequestFilter(dispath_payload.display_search); MiddlePanelStore.filter_whitelist.map(function(key) { if(key in dispath_payload) { filters[key] = dispath_payload[key]; @@ -38,7 +37,7 @@ var reloadIfNecessaryAndDispatch = function(dispath_payload) { } jquery.getJSON('/middle_panel', filters, function(payload) { - dispath_payload.articles = payload.articles; + dispath_payload.clusters = payload.clusters; dispath_payload.filters = filters; JarrDispatcher.dispatch(dispath_payload); _last_fetched_with = MiddlePanelStore.getRequestFilter(); @@ -97,38 +96,38 @@ var MiddlePanelActions = { filter: filter, }); }, - changeRead: function(category_id, feed_id, article_id, new_value){ + changeRead: function(cluster_id, new_value){ jquery.ajax({type: 'PUT', contentType: 'application/json', - data: JSON.stringify({readed: new_value}), - url: "api/v2.0/article/" + article_id, - success: function () { + data: JSON.stringify({read: new_value}), + url: "api/v2.0/cluster/" + cluster_id, + success: function (payload) { JarrDispatcher.dispatch({ type: ActionTypes.CHANGE_ATTR, attribute: 'read', value_bool: new_value, value_num: new_value ? -1 : 1, - articles: [{article_id: article_id, - category_id: category_id, - feed_id: feed_id}], + clusters: [{cluster_id: cluster_id, + categories_id: payload.categories_id, + feeds_id: payload.feeds_id}], }); }, }); }, - changeLike: function(category_id, feed_id, article_id, new_value){ + changeLike: function(cluster_id, new_value){ jquery.ajax({type: 'PUT', contentType: 'application/json', - data: JSON.stringify({like: new_value}), - url: "api/v2.0/article/" + article_id, - success: function () { + data: JSON.stringify({liked: new_value}), + url: "api/v2.0/cluster/" + cluster_id, + success: function (payload) { JarrDispatcher.dispatch({ type: ActionTypes.CHANGE_ATTR, attribute: 'liked', value_bool: new_value, value_num: new_value ? -1 : 1, - articles: [{article_id: article_id, - category_id: category_id, - feed_id: feed_id}], + clusters: [{cluster_id: cluster_id, + categories_id: payload.categories_id, + feeds_id: payload.feeds_id}], }); }, }); @@ -142,7 +141,7 @@ var MiddlePanelActions = { success: function (payload) { JarrDispatcher.dispatch({ type: ActionTypes.MARK_ALL_AS_READ, - articles: payload.articles, + clusters: payload.clusters, }); }, }); diff --git a/src/web/js/actions/RightPanelActions.js b/src/web/js/actions/RightPanelActions.js index 5d78e0013..9f3707825 100644 --- a/src/web/js/actions/RightPanelActions.js +++ b/src/web/js/actions/RightPanelActions.js @@ -4,21 +4,38 @@ var ActionTypes = require('../constants/JarrConstants'); var MenuActions = require('../actions/MenuActions'); var RightPanelActions = { - loadArticle: function(article_id, was_read_before, to_parse) { + loadParent: function(parent_type, parent_id) { + JarrDispatcher.dispatch({ + type: ActionTypes.LOAD_PARENT, + filter_type: parent_type, + filter_id: parent_id + }); + }, + loadCluster: function(cluster_id, was_read_before, to_parse, article_id) { var suffix = ''; if(to_parse) { suffix = '/parse'; + if(article_id) { + suffix += '/' + article_id + } } - jquery.getJSON('/getart/' + article_id + suffix, + jquery.getJSON('/getclu/' + cluster_id + suffix, function(payload) { JarrDispatcher.dispatch({ - type: ActionTypes.LOAD_ARTICLE, - article: payload, + type: ActionTypes.LOAD_CLUSTER, + cluster: payload, was_read_before: was_read_before, + article_id: article_id, }); } ); }, + loadArticle: function(article_id) { + JarrDispatcher.dispatch({ + type: ActionTypes.LOAD_ARTICLE, + article_id: article_id, + }); + }, _apiReq: function(meth, id, obj_type, data, success_callback) { var args = {type: meth, contentType: 'application/json', url: "api/v2.0/" + obj_type + "/" + id} diff --git a/src/web/js/components/MainApp.react.js b/src/web/js/components/MainApp.react.js index ffb145890..7944a00b3 100644 --- a/src/web/js/components/MainApp.react.js +++ b/src/web/js/components/MainApp.react.js @@ -1,6 +1,5 @@ var React = require('react'); -var Col = require('react-bootstrap/lib/Col'); -var Grid = require('react-bootstrap/lib/Grid'); +var Grid = require('react-bootstrap').Grid; var JarrNavBar = require('./Navbar.react'); var Menu = require('./Menu.react'); @@ -14,11 +13,7 @@ var MainApp = React.createClass({ - - - - + diff --git a/src/web/js/components/Menu.react.js b/src/web/js/components/Menu.react.js index 7168743d0..a8fde5f09 100644 --- a/src/web/js/components/Menu.react.js +++ b/src/web/js/components/Menu.react.js @@ -1,9 +1,9 @@ var React = require('react'); -var Col = require('react-bootstrap/lib/Col'); -var Badge = require('react-bootstrap/lib/Badge'); -var Button = require('react-bootstrap/lib/Button'); -var ButtonGroup = require('react-bootstrap/lib/ButtonGroup'); -var Glyphicon = require('react-bootstrap/lib/Glyphicon'); +var Col = require('react-bootstrap').Col; +var Badge = require('react-bootstrap').Badge; +var Button = require('react-bootstrap').Button; +var ButtonGroup = require('react-bootstrap').ButtonGroup; +var Glyphicon = require('react-bootstrap').Glyphicon; var MenuStore = require('../stores/MenuStore'); var MenuActions = require('../actions/MenuActions'); @@ -20,21 +20,25 @@ var FeedItem = React.createClass({ render: function() { var icon = null; var badge_unread = null; + // handling icon, and replacement in case of no-icon if(this.props.icon_url){ icon = (); } else { icon = ; } + // handling unread badge if(this.props.unread){ badge_unread = {this.props.unread}; } + // handling it's the selected feed in the menu var classes = "nav-feed"; if(this.props.active) { classes += " bg-primary"; } - if(this.props.error_count >= MenuStore._datas.max_error) { + // handling error count displaying + if(this.props.error_count >= MenuStore.max_error) { classes += " bg-danger"; - } else if(this.props.error_count > MenuStore._datas.error_threshold) { + } else if(this.props.error_count > MenuStore.error_threshold) { classes += " bg-warning"; } var title = {this.props.title}; @@ -43,6 +47,7 @@ var FeedItem = React.createClass({ ); }, + // filtering on said feed handleClick: function() { MiddlePanelActions.setFeedFilter(this.props.feed_id); }, @@ -54,6 +59,7 @@ var Category = React.createClass({ active_id: React.PropTypes.number}, render: function() { var classes = "nav-cat"; + // handling this category being the selected one in the menu if((this.props.active_type == 'category_id' || this.props.category_id == null) && this.props.active_id == this.props.category_id) { @@ -64,12 +70,14 @@ var Category = React.createClass({ ); }, + // filtering on said category handleClick: function(evnt) { // hack to avoid selection when clicking on folding icon if(!evnt.target.classList.contains('glyphicon')) { if(this.props.category_id != null) { MiddlePanelActions.setCategoryFilter(this.props.category_id); } else { + // handling selecting the "all category" item > removing all filters MiddlePanelActions.removeParentFilter(); } } @@ -95,7 +103,7 @@ var CategoryGroup = React.createClass({ } }, render: function() { - // hidden the no category if empty + // hidden the "no / 0 category" if empty if(!this.props.cat_id && !this.props.feeds.length) { return
    ; } @@ -107,7 +115,7 @@ var CategoryGroup = React.createClass({ var feeds = this.props.feeds.filter(function(feed) { if (filter == 'unread' && feed.unread <= 0) { return false; - } else if (filter == 'error' && feed.error_count <= MenuStore._datas.error_threshold) { + } else if (filter == 'error' && feed.error_count <= MenuStore.error_threshold) { return false; } return true; @@ -125,9 +133,11 @@ var CategoryGroup = React.createClass({ var feeds = []; } var unread = null; + // displaying unread count if(this.props.unread) { unread = {this.props.unread}; } + // folding icon on the right of the category var ctrl = ( ); @@ -142,6 +152,7 @@ var CategoryGroup = React.createClass({
); }, + // handling folding toggleFolding: function(evnt) { this.setState({folded: !this.state.folded}); evnt.stopPropagation(); @@ -241,7 +252,7 @@ var Menu = React.createClass({ var feeds = []; var unread = 0; this.state.categories[cat_id].feeds.map(function(feed_id) { - if(this.state.feeds[feed_id].error_count > MenuStore._datas.error_threshold) { + if(this.state.feeds[feed_id].error_count > MenuStore.error_threshold) { feed_in_error = true; } unread += this.state.feeds[feed_id].unread; diff --git a/src/web/js/components/MiddlePanel.react.js b/src/web/js/components/MiddlePanel.react.js index 95003f2f5..60cd473f5 100644 --- a/src/web/js/components/MiddlePanel.react.js +++ b/src/web/js/components/MiddlePanel.react.js @@ -1,10 +1,13 @@ var React = require('react'); -var Row = require('react-bootstrap/lib/Row'); -var Button = require('react-bootstrap/lib/Button'); -var ButtonGroup = require('react-bootstrap/lib/ButtonGroup'); -var Glyphicon = require('react-bootstrap/lib/Glyphicon'); +var Col = require('react-bootstrap').Col; +var Row = require('react-bootstrap').Row; +var Panel = require('react-bootstrap').Panel; +var Button = require('react-bootstrap').Button; +var ButtonGroup = require('react-bootstrap').ButtonGroup; +var Glyphicon = require('react-bootstrap').Glyphicon; +var MenuStore = require('../stores/MenuStore'); var MiddlePanelStore = require('../stores/MiddlePanelStore'); var MiddlePanelActions = require('../actions/MiddlePanelActions'); var RightPanelActions = require('../actions/RightPanelActions'); @@ -12,15 +15,16 @@ var RightPanelActions = require('../actions/RightPanelActions'); var JarrTime = require('./time.react'); var TableLine = React.createClass({ - propTypes: {article_id: React.PropTypes.number.isRequired, + propTypes: {cluster_id: React.PropTypes.number.isRequired, + main_article_id: React.PropTypes.number.isRequired, feed_title: React.PropTypes.string.isRequired, - icon_url: React.PropTypes.string, title: React.PropTypes.string.isRequired, rel_date: React.PropTypes.string.isRequired, date: React.PropTypes.string.isRequired, read: React.PropTypes.bool.isRequired, selected: React.PropTypes.bool.isRequired, liked: React.PropTypes.bool.isRequired, + feeds_id: React.PropTypes.array.isRequired, }, getInitialState: function() { return {read: this.props.read, liked: this.props.liked, @@ -28,16 +32,17 @@ var TableLine = React.createClass({ }, render: function() { var liked = this.state.liked ? 'l' : ''; - var icon = null; - if(this.props.icon_url){ - icon = (); - } else { - icon = ; - } - var title = ( - {icon} {this.props.feed_title} + {this.props.feeds_id.map(function(feed_id) { + var feed = MenuStore.feeds[feed_id]; + if(feed && feed.icon_url) { + return ; + } + return ; + })} + {this.props.feed_title} ); var read = (); @@ -48,10 +53,9 @@ var TableLine = React.createClass({ if(this.props.selected) { clsses += " active"; } - return (
+ return (
{title} - +
{read} {liked} {this.props.title}
); @@ -63,22 +67,20 @@ var TableLine = React.createClass({ }, toogleRead: function(evnt) { this.setState({read: !this.state.read}, function() { - MiddlePanelActions.changeRead(this.props.category_id, - this.props.feed_id, this.props.article_id, this.state.read); + MiddlePanelActions.changeRead(this.props.cluster_id, this.state.read); }.bind(this)); evnt.stopPropagation(); }, toogleLike: function(evnt) { this.setState({liked: !this.state.liked}, function() { - MiddlePanelActions.changeLike(this.props.category_id, - this.props.feed_id, this.props.article_id, this.state.liked); + MiddlePanelActions.changeLike(this.props.cluster_id, this.state.liked); }.bind(this)); evnt.stopPropagation(); }, - loadArticle: function() { + loadCluster: function() { this.setState({selected: true, read: true}, function() { - RightPanelActions.loadArticle( - this.props.article_id, this.props.read); + RightPanelActions.loadCluster( + this.props.cluster_id, this.props.read); }.bind(this)); }, stopPropagation: function(evnt) { @@ -88,9 +90,9 @@ var TableLine = React.createClass({ var MiddlePanelSearchRow = React.createClass({ getInitialState: function() { - return {query: MiddlePanelStore._datas.query, - search_title: MiddlePanelStore._datas.search_title, - search_content: MiddlePanelStore._datas.search_content, + return {query: MiddlePanelStore.query, + search_title: MiddlePanelStore.search_title, + search_content: MiddlePanelStore.search_content, }; }, render: function() { @@ -143,10 +145,49 @@ var MiddlePanelSearchRow = React.createClass({ }, }); +var MiddlePanelParentFilterRow = React.createClass({ + getInitialState: function() { + return {id: MenuStore.active_id, + type: MenuStore.active_type, + }; + }, + render: function() { + var cn; + var img; + var content = "Selected "; + if (this.state.type == 'feed_id') { + var feed = MenuStore.feeds[this.state.id]; + img = ; + content += "Feed: " + feed.title; + } else if (this.state.type == 'category_id') { + content += "Category: " + MenuStore.categories[this.state.id].name; + } else { + cn = "hidden"; + } + return ( + {img} + {content} + ); + }, + showParent: function(evnt) { + RightPanelActions.loadParent(this.state.type, this.state.id); + }, + componentDidMount: function() { + MenuStore.addChangeListener(this._onChange); + }, + componentWillUnmount: function() { + MenuStore.removeChangeListener(this._onChange); + }, + _onChange: function() { + this.setState({id: MenuStore.active_id, + type: MenuStore.active_type}); + }, +}); + var MiddlePanelFilter = React.createClass({ getInitialState: function() { - return {filter: MiddlePanelStore._datas.filter, - display_search: MiddlePanelStore._datas.display_search}; + return {filter: MiddlePanelStore.filter, + display_search: MiddlePanelStore.display_search}; }, render: function() { var search_row = null; @@ -157,18 +198,18 @@ var MiddlePanelFilter = React.createClass({ @@ -219,31 +260,30 @@ var MiddlePanelFilter = React.createClass({ }, }); -var MiddlePanel = React.createClass({ +var ClusterList = React.createClass({ getInitialState: function() { - return {filter: MiddlePanelStore._datas.filter, articles: []}; + return {filter: MiddlePanelStore.filter, clusters: []}; }, render: function() { return (
- {this.state.articles.map(function(article){ - var key = "a" + article.article_id; - if(article.read) {key+="r";} - if(article.liked) {key+="l";} - if(article.selected) {key+="s";} + {this.state.clusters.map(function(cluster){ + var key = "clu" + cluster.id; + if(cluster.read) {key+="r";} + if(cluster.liked) {key+="l";} + if(cluster.selected) {key+="s";} return ();})} + feed_title={cluster.main_feed_title} />);})}
); @@ -256,10 +296,21 @@ var MiddlePanel = React.createClass({ MiddlePanelStore.removeChangeListener(this._onChange); }, _onChange: function() { - this.setState({filter: MiddlePanelStore._datas.filter, - articles: MiddlePanelStore.getArticles()}); + this.setState({filter: MiddlePanelStore.filter, + clusters: MiddlePanelStore.getClusters()}); + }, +}); + +var MiddlePanel = React.createClass({ + render: function() { + return ( + + + + + ); }, }); -module.exports = {MiddlePanel: MiddlePanel, - MiddlePanelFilter: MiddlePanelFilter}; +module.exports = MiddlePanel; diff --git a/src/web/js/components/Navbar.react.js b/src/web/js/components/Navbar.react.js index dcd57bf84..b295e274f 100644 --- a/src/web/js/components/Navbar.react.js +++ b/src/web/js/components/Navbar.react.js @@ -1,20 +1,20 @@ var React = require('react'); -var Glyphicon = require('react-bootstrap/lib/Glyphicon'); -var Nav = require('react-bootstrap/lib/Nav'); -var NavItem = require('react-bootstrap/lib/NavItem'); -var Navbar = require('react-bootstrap/lib/Navbar'); -var NavDropdown = require('react-bootstrap/lib/NavDropdown'); -var MenuItem = require('react-bootstrap/lib/MenuItem'); -var Modal = require('react-bootstrap/lib/Modal'); -var Button = require('react-bootstrap/lib/Button'); -var Input = require('react-bootstrap/lib/Input'); +var Glyphicon = require('react-bootstrap').Glyphicon; +var Nav = require('react-bootstrap').Nav; +var NavItem = require('react-bootstrap').NavItem; +var Navbar = require('react-bootstrap').Navbar; +var NavDropdown = require('react-bootstrap').NavDropdown; +var MenuItem = require('react-bootstrap').MenuItem; +var Modal = require('react-bootstrap').Modal; +var Button = require('react-bootstrap').Button; +var Input = require('react-bootstrap').Input; var MenuStore = require('../stores/MenuStore'); JarrNavBar = React.createClass({ getInitialState: function() { - return {is_admin: MenuStore._datas.is_admin, - crawling_method: MenuStore._datas.crawling_method, + return {is_admin: MenuStore.is_admin, + crawling_method: MenuStore.crawling_method, showModal: false, modalType: null}; }, buttonFetch: function() { diff --git a/src/web/js/components/RightPanel.react.js b/src/web/js/components/RightPanel.react.js index 5dee0fefa..42a526e03 100644 --- a/src/web/js/components/RightPanel.react.js +++ b/src/web/js/components/RightPanel.react.js @@ -1,9 +1,11 @@ var React = require('react'); -var Col = require('react-bootstrap/lib/Col'); -var Glyphicon = require('react-bootstrap/lib/Glyphicon'); -var Button = require('react-bootstrap/lib/Button'); -var ButtonGroup = require('react-bootstrap/lib/ButtonGroup'); -var Modal = require('react-bootstrap/lib/Modal'); +var Col = require('react-bootstrap').Col; +var Nav = require('react-bootstrap').Nav; +var Modal = require('react-bootstrap').Modal; +var Button = require('react-bootstrap').Button; +var NavItem = require('react-bootstrap').NavItem; +var Glyphicon = require('react-bootstrap').Glyphicon; +var ButtonGroup = require('react-bootstrap').ButtonGroup; var RightPanelActions = require('../actions/RightPanelActions'); var RightPanelStore = require('../stores/RightPanelStore'); @@ -201,7 +203,8 @@ var Article = React.createClass({ reloadParsed: function() { if(this.props.obj.readability_available && !this.props.obj.readability_parsed) { - RightPanelActions.loadArticle(this.props.obj.id, true, true); + RightPanelActions.loadCluster(this.props.obj.cluster_id, + true, true, this.props.obj.id); } }, }); @@ -276,7 +279,10 @@ var Feed = React.createClass({ rows.push(
Filters
); for(var i in this.state.obj.filters) { rows.push(
- When {this.state.obj.filters[i]['action on']} on "{this.state.obj.filters[i].pattern}" ({this.state.obj.filters[i].type}) => {this.state.obj.filters[i].action} + When {this.state.obj.filters[i]['action on']} + on "{this.state.obj.filters[i].pattern}" + ({this.state.obj.filters[i].type}) + "=" {this.state.obj.filters[i].action}
); } return
{rows}
; @@ -285,12 +291,12 @@ var Feed = React.createClass({ var content = null; if(this.state.edit_mode) { var categ_options = []; - for(var index in MenuStore._datas.categories_order) { - var cat_id = MenuStore._datas.categories_order[index]; + for(var index in MenuStore.categories_order) { + var cat_id = MenuStore.categories_order[index]; categ_options.push( ); } content = (; } else if (this.state.modalType == 'addCategory') { heading = 'Add a new category'; - action = '/category/create'; body = ; } return ( -
+ {heading} @@ -61,8 +61,20 @@ JarrNavBar = React.createClass({
); }, + handleModalChange: function(evnt) { + this.setState({modalValue: evnt.target.value}); + }, + submit: function(evnt) { + if(this.state.modalType == 'addCategory') { + MenuActions.addCategory(this.state.modalValue); + } else { + MenuActions.addFeed(this.state.modalValue); + } + evnt.preventDefault() + this.close(); + }, close: function() { - this.setState({showModal: false, modalType: null}); + this.setState({showModal: false, modalType: null, modalValue: null}); }, openAddFeed: function() { this.setState({showModal: true, modalType: 'addFeed'}); diff --git a/src/web/views/api/common.py b/src/web/views/api/common.py index 0a005b201..4e0cfc8df 100644 --- a/src/web/views/api/common.py +++ b/src/web/views/api/common.py @@ -114,7 +114,10 @@ class PyAggResourceNew(PyAggAbstractResource): @api_permission.require(http_exception=403) def post(self): """Create a single new object""" - return self.controller.create(**self.reqparse_args(right='write')), 201 + attrs = self.reqparse_args(right='write') + if not attrs.get('user_id'): + attrs['user_id'] = current_user.id + return self.controller.create(**attrs), 201 class PyAggResourceExisting(PyAggAbstractResource): @@ -178,6 +181,8 @@ class Proxy: try: Proxy.json = attrs args = self.reqparse_args('write', req=Proxy, default=False) + if not args.get('user_id'): + args['user_id'] = current_user.id obj = self.controller.create(**args) results.append(obj) except Exception as error: From 13254e2b6e45e24b989878bf7a73e0d0d499da2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Schmidts?= Date: Sun, 2 Oct 2016 22:26:06 +0200 Subject: [PATCH 012/164] removing dead code (close #50) --- src/crawler/classic_crawler.py | 154 -------------------------- src/manager.py | 35 +----- src/web/controllers/feed.py | 17 --- src/web/js/components/Navbar.react.js | 8 -- src/web/templates/layout.html | 3 - src/web/utils.py | 102 ----------------- src/web/views/feed.py | 17 --- src/web/views/home.py | 18 --- 8 files changed, 2 insertions(+), 352 deletions(-) delete mode 100644 src/crawler/classic_crawler.py diff --git a/src/crawler/classic_crawler.py b/src/crawler/classic_crawler.py deleted file mode 100644 index 569401ea6..000000000 --- a/src/crawler/classic_crawler.py +++ /dev/null @@ -1,154 +0,0 @@ -import ssl -import asyncio -import logging -import feedparser -import dateutil.parser -from bootstrap import conf -from datetime import datetime -from sqlalchemy import or_ - -from bootstrap import db -from web.models import User -from web.controllers import FeedController, ArticleController -from web.lib.feed_utils import construct_feed_from, is_parsing_ok -from crawler.lib.article_utils import construct_article, extract_id, \ - get_article_content - -logger = logging.getLogger(__name__) - -sem = asyncio.Semaphore(5) - -try: - _create_unverified_https_context = ssl._create_unverified_context -except AttributeError: - # Legacy Python that doesn't verify HTTPS certificates by default - pass -else: - # Handle target environment that doesn't support HTTPS verification - ssl._create_default_https_context = _create_unverified_https_context - - -async def get(*args, **kwargs): - # kwargs["connector"] = aiohttp.TCPConnector(verify_ssl=False) - try: - data = feedparser.parse(args[0]) - return data - except Exception as e: - raise e - - -async def parse_feed(user, feed): - """ - Fetch a feed. - Update the feed and return the articles. - """ - parsed_feed = None - up_feed = {} - articles = [] - with (await sem): - try: - parsed_feed = await get(feed.link) - except Exception as e: - up_feed['last_error'] = str(e) - up_feed['error_count'] = feed.error_count + 1 - finally: - up_feed['last_retrieved'] = datetime.utcnow() - if parsed_feed is None: - FeedController().update({'id': feed.id}, up_feed) - return - - if not is_parsing_ok(parsed_feed): - up_feed['last_error'] = str(parsed_feed['bozo_exception']) - up_feed['error_count'] = feed.error_count + 1 - FeedController().update({'id': feed.id}, up_feed) - return - if parsed_feed['entries'] != []: - articles = parsed_feed['entries'] - - up_feed['error_count'] = 0 - up_feed['last_error'] = "" - - # Feed informations - construct_feed_from(feed.link, parsed_feed).update(up_feed) - if feed.title and 'title' in up_feed: - # do not override the title set by the user - del up_feed['title'] - FeedController().update({'id': feed.id}, up_feed) - - return articles - - -async def insert_database(user, feed): - - articles = await parse_feed(user, feed) - if None is articles: - return [] - - logger.debug('inserting articles for {}'.format(feed.title)) - - logger.info("Database insertion...") - new_articles = [] - art_contr = ArticleController(user.id) - for article in articles: - existing_article_req = art_contr.read(feed_id=feed.id, - **extract_id(article)) - exist = existing_article_req.count() != 0 - if exist: - existing_article = existing_article_req.first() - is_updated = False - logger.debug("Article %r (%r) already in the database.", - article['title'], article['link']) - content = get_article_content(article) - if existing_article.title != article['title']: - existing_article.title = article['title'] - is_updated = True - if existing_article.content != content: - existing_article.content = content - is_updated = True - if is_updated: - art_contr.update({'entry_id': existing_article.entry_id}, - existing_article.dump()) - continue - article = construct_article(article, feed.dump()) - try: - new_articles.append(art_contr.create(**article)) - logger.info("New article % (%r) added.", - article['title'], article['link']) - except Exception: - logger.exception("Error when inserting article in database:") - continue - return new_articles - - -async def init_process(user, feed): - # Fetch the feed and insert new articles in the database - articles = await insert_database(user, feed) - logger.debug('inserted articles for %s', feed.title) - return articles - - -def retrieve_feed(loop, user, feed_id=None): - """ - Launch the processus. - """ - logger.info("Starting to retrieve feeds.") - - # Get the list of feeds to fetch - user = User.query.filter(User.email == user.email).first() - feeds = [feed for feed in user.feeds if - feed.error_count <= conf.FEED_ERROR_MAX and feed.enabled] - if feed_id is not None: - feeds = [feed for feed in feeds if feed.id == feed_id] - - if feeds == []: - return - - # Launch the process for all the feeds - tasks = [asyncio.ensure_future(init_process(user, feed)) for feed in feeds] - - try: - loop.run_until_complete(asyncio.wait(tasks)) - except Exception: - logger.exception('an error occured') - - logger.info("All articles retrieved. End of the processus.") diff --git a/src/manager.py b/src/manager.py index bb6c96da2..5a332983a 100755 --- a/src/manager.py +++ b/src/manager.py @@ -51,8 +51,8 @@ def reset_feeds(): last_conn_max = now - timedelta(days=30) feeds = list(fcontr.read().join(User).filter(User.is_active == True, - User.last_connection >= last_conn_max)\ - .with_entities(fcontr._db_cls.user_id)\ + User.last_connection >= last_conn_max) + .with_entities(fcontr._db_cls.user_id) .distinct()) step = timedelta(seconds=3600 / fcontr.read().count()) @@ -62,37 +62,6 @@ def reset_feeds(): 'last_retrieved': now - i * step}) -@manager.command -def fetch_asyncio(user_id, feed_id): - "Crawl the feeds with asyncio." - import asyncio - - with application.app_context(): - from flask_login import current_user - from crawler import classic_crawler - ucontr = UserController() - users = [] - try: - users = [ucontr.get(user_id)] - except: - users = ucontr.read() - finally: - if users == []: - users = ucontr.read() - - try: - feed_id = int(feed_id) - except: - feed_id = None - - loop = asyncio.get_event_loop() - for user in users: - if user.is_active: - logger.warn("Fetching articles for " + user.login) - classic_crawler.retrieve_feed(loop, current_user, feed_id) - loop.close() - - manager.add_command('probe_articles', ArticleProbe()) manager.add_command('probe_feeds', FeedProbe()) diff --git a/src/web/controllers/feed.py b/src/web/controllers/feed.py index 4dc13357b..364710da9 100644 --- a/src/web/controllers/feed.py +++ b/src/web/controllers/feed.py @@ -64,23 +64,6 @@ def list_fetchable(self, max_error=conf.FEED_ERROR_MAX, {'last_retrieved': now}) return feeds - def get_duplicates(self, feed_id): - """ - Compare a list of documents by pair. - Pairs of duplicates are sorted by "retrieved date". - """ - feed = self.get(id=feed_id) - duplicates = [] - for pair in itertools.combinations(feed.articles, 2): - date1, date2 = pair[0].date, pair[1].date - if clear_string(pair[0].title) == clear_string(pair[1].title) \ - and (date1 - date2) < timedelta(days=1): - if pair[0].retrieved_date < pair[1].retrieved_date: - duplicates.append((pair[0], pair[1])) - else: - duplicates.append((pair[1], pair[0])) - return feed, duplicates - def get_inactives(self, nb_days): today = datetime.utcnow() inactives = [] diff --git a/src/web/js/components/Navbar.react.js b/src/web/js/components/Navbar.react.js index 11c145014..9fec44174 100644 --- a/src/web/js/components/Navbar.react.js +++ b/src/web/js/components/Navbar.react.js @@ -18,13 +18,6 @@ JarrNavBar = React.createClass({ crawling_method: MenuStore.crawling_method, showModal: false, modalType: null, modalValue: null}; }, - buttonFetch: function() { - if(this.state.is_admin && this.state.crawling_method != 'http') { - return ( - Fetch - ); - } - }, sectionAdmin: function() { if(this.state.is_admin) { return ( @@ -93,7 +86,6 @@ JarrNavBar = React.createClass({
); }, diff --git a/src/web/js/components/Notifications.react.js b/src/web/js/components/Notifications.react.js new file mode 100644 index 000000000..10f188dc6 --- /dev/null +++ b/src/web/js/components/Notifications.react.js @@ -0,0 +1,47 @@ +var React = require('react'); +var NotificationsStore = require('../stores/NotificationsStore'); +var NotificationSystem = require('react-notification-system'); + + +var Notifications = React.createClass({ + _notificationSystem: null, + addNotification: function(notif) { + this._notificationSystem.addNotification({ + message: notif.message, + level: notif.level, + autoDismiss: 30, + onRemove: this.removeNotification, + }); + }, + removeNotification: function(notif) { + for(var idx in NotificationsStore.notifs) { + if(NotificationsStore.notifs[idx].read == false + && NotificationsStore.notifs[idx].level == notif.level + && NotificationsStore.notifs[idx].message == notif.message) { + NotificationsStore.notifs[idx].read = true; + break; + } + + } + }, + render: function() { + return ; + }, + componentDidMount: function() { + this._notificationSystem = this.refs.notificationSystem; + NotificationsStore.addChangeListener(this._onChange); + }, + componentWillUnmount: function() { + NotificationsStore.removeChangeListener(this._onChange); + }, + _onChange: function() { + for(var idx in NotificationsStore.notifs) { + if(!NotificationsStore.notifs[idx].read) { + this.addNotification(NotificationsStore.notifs[idx]); + NotificationsStore.notifs[idx].read = true; + } + } + }, +}); + +module.exports = Notifications; diff --git a/src/web/js/stores/NotificationsStore.js b/src/web/js/stores/NotificationsStore.js new file mode 100644 index 000000000..8f9613745 --- /dev/null +++ b/src/web/js/stores/NotificationsStore.js @@ -0,0 +1,50 @@ +var JarrDispatcher = require('../dispatcher/JarrDispatcher'); +var ActionTypes = require('../constants/JarrConstants'); +var EventEmitter = require('events').EventEmitter; + +var CHANGE_EVENT = 'change_menu'; +var assign = require('object-assign'); + + +var NotificationsStore = assign({}, EventEmitter.prototype, { + notifs: [], + + addNotifications: function(notifications) { + var count = this.notifs.length; + for(var idx in notifications) { + this.notifs.push({ + key: parseInt(idx) + count, + read: false, + level: notifications[idx].level, + message: notifications[idx].message, + }); + } + }, + getNotifications: function() { + this.notifs = this.notifs.filter(function(notif) {return !notif.read;}); + return this.notifs; + }, + emitChange: function(all_folded) { + if (all_folded) { + this.all_folded = all_folded; + } else { + this.all_folded = null; + } + this.emit(CHANGE_EVENT); + }, + addChangeListener: function(callback) { + this.on(CHANGE_EVENT, callback); + }, + removeChangeListener: function(callback) { + this.removeListener(CHANGE_EVENT, callback); + }, +}); + +NotificationsStore.dispatchToken = JarrDispatcher.register(function(action) { + if(action.notifications) { + NotificationsStore.addNotifications(action.notifications); + NotificationsStore.emitChange(); + } +}); + +module.exports = NotificationsStore; diff --git a/src/web/lib/view_utils.py b/src/web/lib/view_utils.py index 676a513c5..642470fce 100644 --- a/src/web/lib/view_utils.py +++ b/src/web/lib/view_utils.py @@ -1,12 +1,14 @@ import pytz from functools import wraps from datetime import datetime -from flask import request, Response, make_response +from flask import request, Response, make_response, get_flashed_messages from flask_babel import get_locale from babel.dates import format_datetime, format_timedelta from web.views.common import jsonify from lib.utils import to_hash +ACCEPTED_LEVELS = {'success', 'info', 'warning', 'error'} + def etag_match(func): @wraps(func) @@ -41,7 +43,14 @@ def _iter_on_rows(rows, now, locale): yield row +def get_notifications(): + for msg in get_flashed_messages(with_categories=True): + yield {'level': msg[0] if msg[0] in ACCEPTED_LEVELS else 'info', + 'message': msg[1]} + + @jsonify def clusters_to_json(clusters): return {'clusters': _iter_on_rows(clusters, - datetime.utcnow(), get_locale())} + datetime.utcnow(), get_locale()), + 'notifications': get_notifications()} diff --git a/src/web/models/article.py b/src/web/models/article.py index 3e4e2fed2..c336ce9c3 100644 --- a/src/web/models/article.py +++ b/src/web/models/article.py @@ -17,12 +17,15 @@ class Article(db.Model, RightMixin): retrieved_date = Column(DateTime, default=datetime.utcnow) readability_parsed = Column(Boolean, default=False) - # relationships - user_id = Column(Integer, ForeignKey('user.id')) - feed_id = Column(Integer, ForeignKey('feed.id')) - category_id = Column(Integer, ForeignKey('category.id')) + # foreign keys + user_id = Column(Integer, ForeignKey('user.id', ondelete='CASCADE')) + feed_id = Column(Integer, ForeignKey('feed.id', ondelete='CASCADE')) + category_id = Column(Integer, + ForeignKey('category.id', ondelete='CASCADE')) cluster_id = Column(Integer, ForeignKey('cluster.id')) + # relationships + user = relationship('User', back_populates='articles') cluster = relationship('Cluster', back_populates='articles', foreign_keys=[cluster_id]) category = relationship('Category', back_populates='articles', diff --git a/src/web/models/category.py b/src/web/models/category.py index f8c21883d..c9ab0ade0 100644 --- a/src/web/models/category.py +++ b/src/web/models/category.py @@ -10,13 +10,18 @@ class Category(db.Model, RightMixin): name = Column(String) cluster_on_title = Column(Boolean, default=False) + # foreign keys + user_id = Column(Integer, ForeignKey('user.id', ondelete='CASCADE')) + # relationships - user_id = Column(Integer, ForeignKey('user.id')) - feeds = relationship('Feed', backref='category', + user = relationship('User', back_populates='categories') + feeds = relationship('Feed', back_populates='category', cascade='all,delete-orphan') articles = relationship('Article', back_populates='category', cascade='all,delete-orphan') - clusters = relationship('Article', back_populates='category') + clusters = relationship('Cluster', back_populates='categories', + foreign_keys='[Article.category_id, Article.cluster_id]', + secondary='article') # index idx_category_uid = Index('user_id') diff --git a/src/web/models/cluster.py b/src/web/models/cluster.py index da400cbf0..f531b92e0 100644 --- a/src/web/models/cluster.py +++ b/src/web/models/cluster.py @@ -21,16 +21,22 @@ class Cluster(db.Model, RightMixin): main_title = Column(String) main_link = Column(String, default=None) - # relationship + # foreign keys main_article_id = Column(Integer, ForeignKey('article.id')) - user_id = Column(Integer, ForeignKey('user.id')) + user_id = Column(Integer, ForeignKey('user.id', ondelete='CASCADE')) + + # relationships + user = relationship('User', back_populates='clusters') + main_article = relationship('Article', uselist=False, + foreign_keys=[main_article_id]) articles = relationship('Article', back_populates='cluster', - foreign_keys=[Article.cluster_id], - cascade='all,delete-orphan', - order_by=Article.date.asc()) - feeds = relationship('Article', back_populates='cluster', - foreign_keys=[Article.feed_id, Article.cluster_id]) - categories = relationship('Article', back_populates='cluster', + foreign_keys=[Article.cluster_id], + order_by=Article.date.asc()) + feeds = relationship('Feed', back_populates='clusters', + secondary='article', + foreign_keys=[Article.feed_id, Article.cluster_id]) + categories = relationship('Category', back_populates='clusters', + secondary='article', foreign_keys=[Article.cluster_id, Article.category_id]) # index diff --git a/src/web/models/feed.py b/src/web/models/feed.py index 5fcb63438..da47b21a9 100644 --- a/src/web/models/feed.py +++ b/src/web/models/feed.py @@ -29,13 +29,20 @@ class Feed(db.Model, RightMixin): last_error = Column(String, default="") error_count = Column(Integer, default=0) - # relationships + # foreign keys icon_url = Column(String, ForeignKey('icon.url'), default=None) - user_id = Column(Integer, ForeignKey('user.id')) - category_id = Column(Integer, ForeignKey('category.id')) + user_id = Column(Integer, ForeignKey('user.id', ondelete='CASCADE')) + category_id = Column(Integer, + ForeignKey('category.id', ondelete='CASCADE')) + + # relationships + user = relationship('User', back_populates='feeds') + category = relationship('Category', back_populates='feeds') articles = relationship('Article', back_populates='feed', cascade='all,delete-orphan') - clusters = relationship('Article', back_populates='feed') + clusters = relationship('Cluster', back_populates='feeds', + foreign_keys='[Article.feed_id, Article.cluster_id]', + secondary='article') # index idx_feed_uid_cid = Index('user_id', 'category_id') diff --git a/src/web/models/user.py b/src/web/models/user.py index 7348ee418..0d567821c 100644 --- a/src/web/models/user.py +++ b/src/web/models/user.py @@ -6,10 +6,6 @@ from bootstrap import db from web.models.right_mixin import RightMixin -from web.models.category import Category -from web.models.feed import Feed -from web.models.article import Article -from web.models.cluster import Cluster class User(db.Model, UserMixin, RightMixin): @@ -37,18 +33,18 @@ class User(db.Model, UserMixin, RightMixin): linuxfr_identity = Column(String) # relationships - categories = relationship('Category', backref='user', + categories = relationship('Category', back_populates='user', cascade='all, delete-orphan', - foreign_keys=[Category.user_id]) - feeds = relationship('Feed', backref='user', + foreign_keys='[Category.user_id]') + feeds = relationship('Feed', back_populates='user', cascade='all, delete-orphan', - foreign_keys=[Feed.user_id]) - articles = relationship('Article', backref='user', + foreign_keys='[Feed.user_id]') + articles = relationship('Article', back_populates='user', cascade='all, delete-orphan', - foreign_keys=[Article.user_id]) - clusters = relationship('Cluster', backref='user', + foreign_keys='[Article.user_id]') + clusters = relationship('Cluster', back_populates='user', cascade='all, delete-orphan', - foreign_keys=[Cluster.user_id]) + foreign_keys='[Cluster.user_id]') # api whitelists @staticmethod diff --git a/src/web/templates/feed_list.html b/src/web/templates/feed_list.html index b0ea6437f..62072fe35 100644 --- a/src/web/templates/feed_list.html +++ b/src/web/templates/feed_list.html @@ -36,7 +36,6 @@

{{_("No feed")}}

- {% endfor %} diff --git a/src/web/views/admin.py b/src/web/views/admin.py index 1103635b2..baa2b3d4b 100644 --- a/src/web/views/admin.py +++ b/src/web/views/admin.py @@ -50,7 +50,7 @@ def user(user_id=None): unread_counts=clu_contr.count_by_feed(read=False)) else: - flash(gettext('This user does not exist.'), 'warn') + flash(gettext('This user does not exist.'), 'warning') return redirect(redirect_url()) @@ -67,7 +67,7 @@ def toggle_user(user_id=None): {'is_active': not user.is_active}) if not user_changed: - flash(gettext('This user does not exist.'), 'danger') + flash(gettext('This user does not exist.'), 'error') return redirect(url_for('admin.dashboard')) else: diff --git a/src/web/views/feed.py b/src/web/views/feed.py index 050250858..ce68ad057 100644 --- a/src/web/views/feed.py +++ b/src/web/views/feed.py @@ -1,5 +1,4 @@ import logging -import requests.exceptions from werkzeug.exceptions import BadRequest from flask import Blueprint, render_template, flash, \ @@ -31,21 +30,37 @@ def feeds(): @feed_bp.route('/bookmarklet', methods=['GET', 'POST']) @login_required def bookmarklet(): + def check_feeds(link, site_link): + filters = [] + if link: + filters.append({'link': link}) + if link: + filters.append({'site_link': site_link}) + filters = {'__or__': filters} if len(filters) > 1 else filters[0] + feed_exists = feed_contr.read(**filters).first() + if feed_exists: + flash(gettext("Didn't add feed: feed already exists."), + "warning") + return feed_exists + feed_contr = FeedController(current_user.id) url = (request.args if request.method == 'GET' else request.form)\ .get('url', None) + if not url: flash(gettext("Couldn't add feed: url missing."), "error") raise BadRequest("url is missing") - feed_exists = list(feed_contr.read(__or__=[{'link': url}, - {'site_link': url}])) - if feed_exists: - flash(gettext("Didn't add feed: feed already exists."), - "warning") - return redirect(url_for('home', at='f', ai=feed_exists[0].id)) + existing_feed = check_feeds(url, url) + if existing_feed: + return redirect(url_for('home', at='f', ai=existing_feed.id)) feed = construct_feed_from(url) + + existing_feed = check_feeds(feed.get('link'), feed.get('site_link')) + if existing_feed: + return redirect(url_for('home', at='f', ai=existing_feed.id)) + if not feed.get('link'): feed['enabled'] = False flash(gettext("Couldn't find a feed url, you'll need to find a Atom or" @@ -59,9 +74,6 @@ def bookmarklet(): @feeds_bp.route('/inactives', methods=['GET']) @login_required def inactives(): - """ - List of inactive feeds. - """ nb_days = int(request.args.get('nb_days', 365)) inactives = FeedController(current_user.id).get_inactives(nb_days) return render_template('inactives.html', diff --git a/src/web/views/home.py b/src/web/views/home.py index 49187fb27..2f2583386 100644 --- a/src/web/views/home.py +++ b/src/web/views/home.py @@ -9,7 +9,7 @@ from bootstrap import conf from web.lib.article_cleaner import clean_urls -from web.lib.view_utils import etag_match, clusters_to_json +from web.lib.view_utils import etag_match, clusters_to_json, get_notifications from web.views.common import jsonify from web.controllers import (UserController, CategoryController, @@ -71,6 +71,7 @@ def get_menu(): 'max_error': conf.FEED_ERROR_MAX, 'error_threshold': conf.FEED_ERROR_THRESHOLD, 'is_admin': current_user.is_admin, + 'notifications': get_notifications(), 'all_unread_count': 0} @@ -144,7 +145,7 @@ def get_cluster(cluster_id, parse=False, article_id=None): new_content = clean_urls(new_content, article['link'], fix_readability=True) except Exception as error: - flash("Readability failed with %r" % error, "danger") + flash("Readability failed with %r" % error, "warning") article['readability_parsed'] = False else: article['readability_parsed'] = True @@ -153,6 +154,7 @@ def get_cluster(cluster_id, parse=False, article_id=None): {'readability_parsed': True, 'content': new_content}) for article in cluster.articles: article['readability_available'] = readability_available + cluster['notifications'] = get_notifications() return cluster diff --git a/src/web/views/user.py b/src/web/views/user.py index b883ab9a4..dae1222ef 100644 --- a/src/web/views/user.py +++ b/src/web/views/user.py @@ -45,7 +45,7 @@ def opml_import(): try: subscriptions = opml.from_string(data.read()) except: - flash(gettext("Couldn't parse file"), 'danger') + flash(gettext("Couldn't parse file"), 'error') return redirect(request.referrer) ccontr = CategoryController(current_user.id) diff --git a/src/web/views/views.py b/src/web/views/views.py index 7588ea998..3ce3e1804 100644 --- a/src/web/views/views.py +++ b/src/web/views/views.py @@ -21,7 +21,7 @@ def authentication_required(error): def authentication_failed(error): if conf.API_ROOT in request.url: return error - flash(gettext('Forbidden.'), 'danger') + flash(gettext('Forbidden.'), 'error') return redirect(url_for('login')) From 8dbfcfe4e51716a540c4b19348b13df6aec60a76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Schmidts?= Date: Sat, 8 Oct 2016 13:05:49 +0200 Subject: [PATCH 020/164] fixing updating unread count --- src/web/js/actions/MiddlePanelActions.js | 55 +++++++----------------- src/web/js/stores/MenuStore.js | 37 ++++++++++++++-- src/web/js/stores/MiddlePanelStore.js | 2 +- 3 files changed, 50 insertions(+), 44 deletions(-) diff --git a/src/web/js/actions/MiddlePanelActions.js b/src/web/js/actions/MiddlePanelActions.js index cd9107d63..28dffc060 100644 --- a/src/web/js/actions/MiddlePanelActions.js +++ b/src/web/js/actions/MiddlePanelActions.js @@ -3,48 +3,25 @@ var ActionTypes = require('../constants/JarrConstants'); var jquery = require('jquery'); var MiddlePanelStore = require('../stores/MiddlePanelStore'); -var _last_fetched_with = {}; -var shouldFetch = function(filters) { - return true; // FIXME disabling intelligent fetch for now, no caching better that bad one -// if(filters.filter != null // undefined means unchanged -// && (_last_fetched_with.filter != 'all' -// || _last_fetched_with.filter != filters.filter)) { -// return true; -// } -// if(_last_fetched_with.filter_type != null) { -// if(_last_fetched_with.filter_type != filters.filter_type) { -// return true; -// } -// if(_last_fetched_with.filter_id != filters.filter_id) { -// return true; -// } -// } -// return false; -} var reloadIfNecessaryAndDispatch = function(dispath_payload) { - if(shouldFetch(dispath_payload)) { - var filters = MiddlePanelStore.getRequestFilter(dispath_payload.display_search); - MiddlePanelStore.filter_whitelist.map(function(key) { - if(key in dispath_payload) { - filters[key] = dispath_payload[key]; - } - if(filters[key] == null) { - delete filters[key]; - } - }); - if('display_search' in filters) { - delete filters['display_search']; + var filters = MiddlePanelStore.getRequestFilter(dispath_payload.display_search); + MiddlePanelStore.filter_whitelist.map(function(key) { + if(key in dispath_payload) { + filters[key] = dispath_payload[key]; } - jquery.getJSON('/middle_panel', filters, - function(payload) { - dispath_payload.clusters = payload.clusters; - dispath_payload.filters = filters; - JarrDispatcher.dispatch(dispath_payload); - _last_fetched_with = MiddlePanelStore.getRequestFilter(); - }); - } else { - JarrDispatcher.dispatch(dispath_payload); + if(filters[key] == null) { + delete filters[key]; + } + }); + if('display_search' in filters) { + delete filters['display_search']; } + jquery.getJSON('/middle_panel', filters, + function(payload) { + dispath_payload.clusters = payload.clusters; + dispath_payload.filters = filters; + JarrDispatcher.dispatch(dispath_payload); + }); } diff --git a/src/web/js/stores/MenuStore.js b/src/web/js/stores/MenuStore.js index bf41b0fe0..4e6eb6412 100644 --- a/src/web/js/stores/MenuStore.js +++ b/src/web/js/stores/MenuStore.js @@ -13,7 +13,7 @@ var MenuStore = assign({}, EventEmitter.prototype, { active_type: null, active_id: null, is_admin: false, - crawling_method: 'classic', + crawling_method: 'http', all_unread_count: -1, max_error: 0, error_threshold: 0, @@ -76,9 +76,39 @@ MenuStore.dispatchToken = JarrDispatcher.register(function(action) { MenuStore.emitChange(); break; case ActionTypes.PARENT_FILTER: - if(MenuStore.setActive(action.filter_type, action.filter_id)) { - MenuStore.emitChange(); + var changed = MenuStore.setActive(action.filter_type, action.filter_id); + if(action.filters && action.clusters && !action.filters.query + && action.filters.filter == 'unread') { + var new_unread = {}; + action.clusters.map(function(cluster) { + cluster.feeds_id.map(function(feed_id) { + if(!(feed_id in new_unread)) { + new_unread[feed_id] = 0; + } + if(!cluster.read) { + new_unread[feed_id] += 1; + } + }); + }); + for(var feed_id in new_unread) { + var cat_id = MenuStore.feeds[feed_id].category_id; + var old_unread = MenuStore.feeds[feed_id].unread; + if(old_unread > new_unread[feed_id]) { + continue; + } + changed = true; + if((MenuStore.active_type == 'feed_id' + && MenuStore.active_id == feed_id) + || (MenuStore.active_type == 'category_id' + && MenuStore.active_id == cat_id) + || (MenuStore.active_type == null)) { + MenuStore.feeds[feed_id].unread = new_unread[feed_id]; + MenuStore.categories[cat_id].unread -= old_unread; + MenuStore.categories[cat_id].unread += new_unread[feed_id]; + } + } } + if(changed) {MenuStore.emitChange();} break; case ActionTypes.MENU_FILTER: if (MenuStore.setFilter(action.filter)) { @@ -109,7 +139,6 @@ MenuStore.dispatchToken = JarrDispatcher.register(function(action) { MenuStore.readCluster(cluster, -1); } }); - MenuStore.emitChange(); break; default: diff --git a/src/web/js/stores/MiddlePanelStore.js b/src/web/js/stores/MiddlePanelStore.js index ff8661b77..a3472a35b 100644 --- a/src/web/js/stores/MiddlePanelStore.js +++ b/src/web/js/stores/MiddlePanelStore.js @@ -103,7 +103,7 @@ MiddlePanelStore.dispatchToken = JarrDispatcher.register(function(action) { var val = action.value_bool; action.clusters.map(function(cluster) { for (var i in MiddlePanelStore.clusters) { - if(MiddlePanelStore.clusters[i].id == cluster.id) { + if(MiddlePanelStore.clusters[i].id == cluster.cluster_id) { if (MiddlePanelStore.clusters[i][attr] != val) { MiddlePanelStore.clusters[i][attr] = val; // avoiding redraw if not filter, display won't change anyway From c1d94620c859bd69d3b8198ab5287145204b5f40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Schmidts?= Date: Sat, 8 Oct 2016 16:40:35 +0200 Subject: [PATCH 021/164] updating feed title triggers the denorm on cluster --- src/bootstrap.py | 1 + src/tests/controllers/feed_test.py | 22 +++++++++++++ src/web/controllers/article.py | 7 ++-- src/web/controllers/cluster.py | 12 +++---- src/web/controllers/feed.py | 37 ++++++++++++++++++---- src/web/js/actions/MenuActions.js | 3 +- src/web/js/actions/RightPanelActions.js | 31 ++++++++++++++++-- src/web/js/components/MiddlePanel.react.js | 23 ++++++++++---- 8 files changed, 108 insertions(+), 28 deletions(-) diff --git a/src/bootstrap.py b/src/bootstrap.py index eacd89eda..bb2914a0a 100644 --- a/src/bootstrap.py +++ b/src/bootstrap.py @@ -55,6 +55,7 @@ def set_logging(log_path=None, log_level=logging.INFO, modules=(), else: application.debug = conf.LOG_LEVEL <= logging.DEBUG +SQLITE_ENGINE = 'sqlite' in conf.SQLALCHEMY_DATABASE_URI PARSED_PLATFORM_URL = urlparse(conf.PLATFORM_URL) application.config['SERVER_NAME'] = PARSED_PLATFORM_URL.netloc application.config['PREFERRED_URL_SCHEME'] = PARSED_PLATFORM_URL.scheme diff --git a/src/tests/controllers/feed_test.py b/src/tests/controllers/feed_test.py index 01ae24fad..9055e85a9 100644 --- a/src/tests/controllers/feed_test.py +++ b/src/tests/controllers/feed_test.py @@ -19,3 +19,25 @@ def test_feed_rights(self): ArticleController().read(feed_id=feed['id']).count()) self._test_controller_rights(feed, UserController().get(id=feed['user_id'])) + + def test_update_cluster_on_change_title(self): + feed = FeedController(2).read()[0] + for cluster in feed.clusters: + self.assertEquals(feed['title'], cluster['main_feed_title']) + FeedController(2).update({'id': feed.id}, {'title': 'updated title'}) + + feed = FeedController(2).get(id=feed.id) + self.assertEquals('updated title', feed.title) + for cluster in feed.clusters: + self.assertEquals(feed.title, cluster.main_feed_title) + + def test_admin_update_cluster_on_change_title(self): + feed = FeedController(2).read()[0] + for cluster in feed.clusters: + self.assertEquals(feed['title'], cluster['main_feed_title']) + FeedController().update({'id': feed.id}, {'title': 'updated title'}) + + feed = FeedController().get(id=feed.id) + self.assertEquals('updated title', feed.title) + for cluster in feed.clusters: + self.assertEquals(feed.title, cluster.main_feed_title) diff --git a/src/web/controllers/article.py b/src/web/controllers/article.py index 56d037e49..20a6c67a2 100644 --- a/src/web/controllers/article.py +++ b/src/web/controllers/article.py @@ -115,14 +115,11 @@ def remove_from_all_clusters(self, article_id): """ from web.controllers import ClusterController clu_ctrl = ClusterController(self.user_id) - # FIXME : update all articles - article = self.get(id=article_id) - cluster = article.cluster + cluster = self.get(id=article_id).cluster if len(cluster.articles) == 1: clu_ctrl.delete(cluster.id) return False - clu_ctrl.update({'id': cluster.id}, - {'main_article_id': cluster.articles[1].id}) + clu_ctrl._enrich_cluster(cluster, cluster.articles[1]) return True def delete(self, obj_id): diff --git a/src/web/controllers/cluster.py b/src/web/controllers/cluster.py index 365bbf128..93e0b7894 100644 --- a/src/web/controllers/cluster.py +++ b/src/web/controllers/cluster.py @@ -1,5 +1,5 @@ import logging -from bootstrap import conf, db +from bootstrap import db, SQLITE_ENGINE from sqlalchemy import func, Integer, and_ from sqlalchemy.orm import aliased @@ -11,7 +11,6 @@ from web.controllers.article import ArticleController logger = logging.getLogger(__name__) -SQLITE_ENGINE = 'sqlite' in conf.SQLALCHEMY_DATABASE_URI class ClusterController(AbstractController): @@ -46,14 +45,15 @@ def _create_from_article(self, article, self._enrich_cluster(cluster, article, cluster_read, cluster_liked) def _enrich_cluster(self, cluster, article, - cluster_read=None, cluster_liked=False): + cluster_read=None, cluster_liked=False, + force_article_as_main=False): article.cluster = cluster # a cluster if cluster_read is not None: cluster.read = cluster.read and cluster_read # once one article is liked the cluster is liked cluster.liked = cluster.liked or cluster_liked - if cluster.main_date > article.date: + if cluster.main_date > article.date or force_article_as_main: cluster.main_title = article.title cluster.main_date = article.date cluster.main_feed_title = article.feed.title @@ -105,7 +105,7 @@ def join_read(self, feed_id=None, **filters): if filter_on_category: selected_fields.append(func.group_concat( art_cat_alias.category_id).label('categories_id')) - else: + else: # pragma: no cover selected_fields.append(func.array_agg(art_feed_alias.feed_id, type_=ARRAY(Integer)).label('feeds_id')) if filter_on_category: @@ -162,7 +162,7 @@ def join_read(self, feed_id=None, **filters): map(int, clu.categories_id.split(','))) elif filter_on_category: row['categories_id'] = [0] - else: + else: # pragma: no cover row['feeds_id'] = set(clu.feeds_id) if filter_on_category: row['categories_id'] = set(clu.categories_id) diff --git a/src/web/controllers/feed.py b/src/web/controllers/feed.py index d44cf7a27..1c975dfd3 100644 --- a/src/web/controllers/feed.py +++ b/src/web/controllers/feed.py @@ -1,10 +1,12 @@ import logging +from sqlalchemy import and_ +from sqlalchemy.sql import select, update from datetime import datetime, timedelta -from bootstrap import conf -from .abstract import AbstractController -from .icon import IconController -from web.models import User, Feed +from bootstrap import db, conf, SQLITE_ENGINE +from web.controllers.abstract import AbstractController +from web.controllers.icon import IconController +from web.models import User, Feed, Article, Cluster logger = logging.getLogger(__name__) DEFAULT_LIMIT = 5 @@ -101,10 +103,31 @@ def create(self, **attrs): def update(self, filters, attrs, *args, **kwargs): self._ensure_icon(attrs) self.__clean_feed_fields(attrs) - if 'category_id' in attrs: + if {'title', 'category_id'}.intersection(attrs): for feed in self.read(**filters): - self.__get_art_contr().update({'feed_id': feed.id}, - {'category_id': attrs['category_id']}, *args, **kwargs) + if 'category_id' in attrs: + self.__get_art_contr().update({'feed_id': feed.id}, + {'category_id': attrs['category_id']}) + if 'title' in attrs: + # sqlite doesn't support join on update, but they're REALLY + # more efficient so we'll use them anyway with postgres + if self.user_id: + where_clause = and_(Article.user_id == self.user_id, + Article.feed_id == feed.id) + else: + where_clause = Article.feed_id == feed.id + if SQLITE_ENGINE: + stmt = select([Article.id]).where(where_clause) + stmt = update(Cluster)\ + .where(Cluster.main_article_id.in_(stmt))\ + .values(main_feed_title=attrs['title']) + else: # pragma: no cover + stmt = update(Cluster)\ + .where(and_( + Article.id == Cluster.main_article_id, + where_clause))\ + .values(dict(main_feed_title=attrs['title'])) + db.session.execute(stmt) return super().update(filters, attrs, *args, **kwargs) def delete(self, obj_id): diff --git a/src/web/js/actions/MenuActions.js b/src/web/js/actions/MenuActions.js index 9cb804cb1..755d43d9d 100644 --- a/src/web/js/actions/MenuActions.js +++ b/src/web/js/actions/MenuActions.js @@ -16,7 +16,8 @@ var MenuActions = { * purpose of setting filter and that the setFilterFunc is not * some event passed by react */ - if(set_filter == 'set_filter' && typeof setFilterFunc == 'function' && id) { + if(set_filter == 'set_filter' && typeof setFilterFunc == 'function' + && (id || id == 0)) { setFilterFunc(id); /* old_all_unread_count will be -1 on first iteration, * so it won't be triggered twice */ diff --git a/src/web/js/actions/RightPanelActions.js b/src/web/js/actions/RightPanelActions.js index 92308b707..a78f01105 100644 --- a/src/web/js/actions/RightPanelActions.js +++ b/src/web/js/actions/RightPanelActions.js @@ -2,6 +2,9 @@ var jquery = require('jquery'); var JarrDispatcher = require('../dispatcher/JarrDispatcher'); var ActionTypes = require('../constants/JarrConstants'); var MenuActions = require('../actions/MenuActions'); +var MenuStore = require('../stores/MenuStore'); +var MiddlePanelActions = require('../actions/MiddlePanelActions'); + var RightPanelActions = { loadParent: function(parent_type, parent_id) { @@ -45,10 +48,34 @@ var RightPanelActions = { jquery.ajax(args); }, putObj: function(id, obj_type, fields) { - this._apiReq('PUT', id, obj_type, fields, MenuActions.reload); + function callback() { + MenuActions.reload(); + if('title' in fields && obj_type == 'feed') { + MiddlePanelActions.reload(); + } + } + this._apiReq('PUT', id, obj_type, fields, callback); }, delObj: function(id, obj_type, fields) { - this._apiReq('DELETE', id, obj_type, null, MenuActions.reload); + var future_active_type; + var future_active_id = 0; + if(obj_type == 'feed') { + future_active_type = 'category_id'; + future_active_id = MenuStore.feeds[id].category_id; + } + function callback() { + var cmd; + var reload_callback; + if(obj_type == 'feed') { + cmd = 'set_filter'; + reload_callback = MiddlePanelActions.setCategoryFilter; + } else if (obj_type == 'category') { + cmd = 'set_filter'; + reload_callback = MiddlePanelActions.removeParentFilter; + } + MenuActions.reload(cmd, reload_callback, future_active_id); + } + this._apiReq('DELETE', id, obj_type, null, callback); }, resetErrors: function(feed_id) { this._apiReq('PUT', feed_id, 'feed', {error_count: 0, last_error: ''}, diff --git a/src/web/js/components/MiddlePanel.react.js b/src/web/js/components/MiddlePanel.react.js index 60cd473f5..aa1075b27 100644 --- a/src/web/js/components/MiddlePanel.react.js +++ b/src/web/js/components/MiddlePanel.react.js @@ -32,7 +32,7 @@ var TableLine = React.createClass({ }, render: function() { var liked = this.state.liked ? 'l' : ''; - var title = ( {this.props.feeds_id.map(function(feed_id) { @@ -149,6 +149,8 @@ var MiddlePanelParentFilterRow = React.createClass({ getInitialState: function() { return {id: MenuStore.active_id, type: MenuStore.active_type, + title: null, + icon_url: null, }; }, render: function() { @@ -156,11 +158,10 @@ var MiddlePanelParentFilterRow = React.createClass({ var img; var content = "Selected "; if (this.state.type == 'feed_id') { - var feed = MenuStore.feeds[this.state.id]; - img = ; - content += "Feed: " + feed.title; + content += "Feed: " + this.state.title; + img = ; } else if (this.state.type == 'category_id') { - content += "Category: " + MenuStore.categories[this.state.id].name; + content += "Category: " + this.state.title; } else { cn = "hidden"; } @@ -179,8 +180,16 @@ var MiddlePanelParentFilterRow = React.createClass({ MenuStore.removeChangeListener(this._onChange); }, _onChange: function() { - this.setState({id: MenuStore.active_id, - type: MenuStore.active_type}); + var new_state = {id: MenuStore.active_id, title: null, + type: MenuStore.active_type, icon_url: null}; + if (new_state.type == 'feed_id' && new_state.id in MenuStore.feeds) { + new_state.title = MenuStore.feeds[new_state.id].title; + new_state.icon_url = MenuStore.feeds[new_state.id].icon_url; + } else if (new_state.type == 'category_id' + && new_state.id in MenuStore.categories) { + new_state.title = MenuStore.categories[new_state.id].name; + } + this.setState(new_state); }, }); From ea2c0d2fd6bc156ad1bb6ec8549e29e0d3c26357 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Schmidts?= Date: Sun, 9 Oct 2016 20:04:44 +0200 Subject: [PATCH 022/164] basic tag support (close #54) --- src/crawler/http_crawler.py | 25 ++-- src/lib/article_utils.py | 119 +++++++++++++++--- .../versions/9462d9753423_tag_handling.py | 29 +++++ .../versions/a7f62d50d366_clustering.py | 25 ++-- src/tests/api/article_test.py | 14 ++- src/tests/controllers/article_test.py | 29 +++++ src/tests/crawler_test.py | 18 +++ src/tests/fixtures/filler.py | 6 +- src/web/controllers/abstract.py | 4 + src/web/controllers/article.py | 41 +++--- src/web/controllers/cluster.py | 3 +- src/web/js/components/RightPanel.react.js | 94 +++++++------- src/web/js/stores/MenuStore.js | 6 +- src/web/models/__init__.py | 3 +- src/web/models/article.py | 12 +- src/web/models/right_mixin.py | 6 + src/web/models/tag.py | 18 +++ src/web/views/views.py | 1 - 18 files changed, 334 insertions(+), 119 deletions(-) create mode 100644 src/migrations/versions/9462d9753423_tag_handling.py create mode 100644 src/web/models/tag.py diff --git a/src/crawler/http_crawler.py b/src/crawler/http_crawler.py index d89dd3747..2bcc41858 100644 --- a/src/crawler/http_crawler.py +++ b/src/crawler/http_crawler.py @@ -24,7 +24,7 @@ from requests_futures.sessions import FuturesSession from lib.utils import default_handler, to_hash from lib.feed_utils import construct_feed_from, is_parsing_ok -from lib.article_utils import extract_id, construct_article +from lib.article_utils import construct_article, process_filters, FiltersAction logger = logging.getLogger(__name__) logging.captureWarnings(True) @@ -181,9 +181,9 @@ def callback(self, response): 'feed; bumping error count to %r', self.feed['id'], self.feed['title'], error_count) future = self.query_jarr('put', 'feed/%d' % self.feed['id'], - {'error_count': error_count, - 'last_error': str(error), - 'user_id': self.feed['user_id']}) + {'error_count': error_count, + 'last_error': str(error), + 'user_id': self.feed['user_id']}) return if response.status_code == 304: @@ -215,13 +215,24 @@ def callback(self, response): self.feed['id'], self.feed['title']) ids, entries = [], {} + feedparser parsed_response = feedparser.parse(response.content) for entry in parsed_response['entries']: - entry_ids = {'entry_id': extract_id(entry), - 'feed_id': self.feed['id'], - 'user_id': self.feed['user_id']} + entry_ids = construct_article(entry, self.feed, + {'entry_id', 'feed_id', 'user_id', 'tags'}) + skipped, _, _ = process_filters(self.feed['filters'], entry_ids, + {FiltersAction.SKIP}) + if skipped: + logger.debug('%r %r - skipping article', + self.feed['id'], self.feed['title']) + continue + del entry_ids['tags'] entries[tuple(sorted(entry_ids.items()))] = entry ids.append(entry_ids) + if not ids: + logger.info('%r %r - all articles skipped, adding nothing', + self.feed['id'], self.feed['title']) + return logger.debug('%r %r - found %d entries %r', self.feed['id'], self.feed['title'], len(ids), ids) future = self.query_jarr('get', 'articles/challenge', {'ids': ids}) diff --git a/src/lib/article_utils.py b/src/lib/article_utils.py index e5c1992ec..29f19b2c2 100644 --- a/src/lib/article_utils.py +++ b/src/lib/article_utils.py @@ -1,5 +1,7 @@ +import re import html import logging +from enum import Enum import dateutil.parser from datetime import datetime, timezone from bs4 import BeautifulSoup, SoupStrainer @@ -17,29 +19,37 @@ def extract_id(entry): return entry.get('entry_id') or entry.get('id') or entry['link'] -def construct_article(entry, feed): +def construct_article(entry, feed, fields=None): "Safe method to transorm a feedparser entry into an article" now = datetime.utcnow() - date = None - for date_key in ('published', 'created', 'date'): - if entry.get(date_key): - try: - date = dateutil.parser.parse(entry[date_key])\ - .astimezone(timezone.utc) - except Exception: - pass - else: - break + article = {} - content = get_article_content(entry) - link, title = get_article_details(entry) - content = clean_urls(content, link) - - return {'feed_id': feed['id'], - 'user_id': feed['user_id'], - 'entry_id': extract_id(entry), - 'link': link, 'content': content, 'title': title, - 'retrieved_date': now, 'date': date or now} + def push_in_article(key, value): + if fields is None or key in fields: + article[key] = value + push_in_article('feed_id', feed['id']) + push_in_article('user_id', feed['user_id']) + push_in_article('entry_id', extract_id(entry)) + push_in_article('retrieved_date', now) + if fields is None or 'date' in fields: + for date_key in ('published', 'created', 'updated'): + if entry.get(date_key): + try: + article['date'] = dateutil.parser.parse(entry[date_key])\ + .astimezone(timezone.utc) + except Exception: + pass + else: + break + push_in_article('content', get_article_content(entry)) + if fields is None or {'link', 'title'}.intersection(fields): + link, title = get_article_details(entry) + push_in_article('link', link) + push_in_article('title', title) + push_in_article('content', clean_urls(article['content'], link)) + push_in_article('tags', [tag.get('term').strip() + for tag in entry.get('tags', [])]) + return article def get_article_content(entry): @@ -71,3 +81,72 @@ def get_article_details(entry): except IndexError: # no title pass return article_link, article_title or 'No title' + + +class FiltersAction(Enum): + READ = 'mark as read' + LIKED = 'mark as favorite' + SKIP = 'skipped' + + +class FiltersType(Enum): + REGEX = 'regex' + MATCH = 'simple match' + EXACT_MATCH = 'exact match' + TAG_MATCH = 'tag match' + TAG_CONTAINS = 'tag contains' + + +class FiltersTrigger(Enum): + MATCH = 'match' + NO_MATCH = 'no match' + + +def process_filters(filters, article, only_actions=None): + skipped, read, liked = False, None, False + filters = filters or [] + if only_actions is None: + only_actions = set(FiltersAction) + for filter_ in filters: + match = False + try: + pattern = filter_.get('pattern', '') + filter_type = FiltersType(filter_.get('type')) + filter_action = FiltersAction(filter_.get('action')) + filter_trigger = FiltersTrigger(filter_.get('action on')) + if filter_type is not FiltersType.REGEX: + pattern = pattern.lower() + except ValueError: + continue + if filter_action not in only_actions: + logger.debug('ignoring filter %r' % filter_) + continue + + title = article.get('title').lower() + tags = [tag.lower() for tag in article.get('tags', [])] + if filter_type is FiltersType.REGEX: + match = re.match(pattern, title) + elif filter_type is FiltersType.MATCH: + match = pattern in title + elif filter_type is FiltersType.EXACT_MATCH: + match = pattern == title + elif filter_type is FiltersType.TAG_MATCH: + match = pattern in tags + elif filter_type is FiltersType.TAG_CONTAINS: + match = any(pattern in tag for tag in tags) + take_action = match and filter_trigger is FiltersTrigger.MATCH \ + or not match and filter_trigger is FiltersTrigger.NO_MATCH + + if not take_action: + continue + + if filter_action is FiltersAction.READ: + read = True + elif filter_action is FiltersAction.LIKED: + liked = True + elif filter_action is FiltersAction.SKIP: + skipped = True + + if skipped or read or liked: + logger.info("%r applied on %r", filter_action.value, article['link']) + return skipped, read, liked diff --git a/src/migrations/versions/9462d9753423_tag_handling.py b/src/migrations/versions/9462d9753423_tag_handling.py new file mode 100644 index 000000000..8f5434c5f --- /dev/null +++ b/src/migrations/versions/9462d9753423_tag_handling.py @@ -0,0 +1,29 @@ +"""adding tag handling capacities + +Revision ID: 9462d9753423 +Revises: 835c03754c69 +Create Date: 2016-10-08 23:07:57.425931 + +""" + +# revision identifiers, used by Alembic. +revision = '9462d9753423' +down_revision = '835c03754c69' +branch_labels = None +depends_on = None + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + op.create_table('tag', + sa.Column('text', sa.String(), nullable=False), + sa.Column('article_id', sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(['article_id'], ['article.id'], + ondelete='CASCADE'), + sa.PrimaryKeyConstraint('text', 'article_id') + ) + +def downgrade(): + op.drop_table('tag') diff --git a/src/migrations/versions/a7f62d50d366_clustering.py b/src/migrations/versions/a7f62d50d366_clustering.py index 646e1348d..567149dab 100644 --- a/src/migrations/versions/a7f62d50d366_clustering.py +++ b/src/migrations/versions/a7f62d50d366_clustering.py @@ -13,7 +13,7 @@ depends_on = None from datetime import datetime -from bootstrap import conf +from bootstrap import SQLITE_ENGINE from alembic import op import sqlalchemy as sa @@ -39,7 +39,7 @@ def upgrade(): sa.Column('cluster_id', sa.Integer(), nullable=True)) from web.models import Cluster, Feed, Article - if 'sqlite' not in conf.SQLALCHEMY_DATABASE_URI: + if not SQLITE_ENGINE: op.create_foreign_key(None, 'article', 'cluster', ['cluster_id'], ['id']) op.create_foreign_key(None, 'cluster', 'article', @@ -98,26 +98,27 @@ def upgrade(): batch_op.drop_column('readed') batch_op.drop_column('like') - if 'sqlite' not in conf.SQLALCHEMY_DATABASE_URI: - print('%s - creating index 0/5' % datetime.now().isoformat()) - op.execute('CREATE INDEX article_uid_cluid ON article ' - '(user_id, cluster_id);') - print('%s - creating index 1/5' % datetime.now().isoformat()) + if not SQLITE_ENGINE: + print('%s - creating index 0/6' % datetime.now().isoformat()) + op.execute('CREATE INDEX article_cluid ON article (cluster_id);') + print('%s - creating index 1/6' % datetime.now().isoformat()) op.execute('CREATE INDEX article_uid_cid_cluid ON article' '(user_id, category_id, cluster_id);') - print('%s - creating index 2/5' % datetime.now().isoformat()) + print('%s - creating index 2/6' % datetime.now().isoformat()) op.execute('CREATE INDEX article_uid_fid_cluid ON article' '(user_id, feed_id, cluster_id);') - print('%s - creating index 3/5' % datetime.now().isoformat()) + print('%s - creating index 3/6' % datetime.now().isoformat()) op.execute('CREATE INDEX cluster_uid_date ON cluster ' '(user_id, main_date DESC NULLS LAST);') - print('%s - creating index 4/5' % datetime.now().isoformat()) + print('%s - creating index 4/6' % datetime.now().isoformat()) op.execute('CREATE INDEX cluster_liked_uid_date ON cluster ' '(liked, user_id, main_date DESC NULLS LAST);') - print('%s - creating index 5/5' % datetime.now().isoformat()) + print('%s - creating index 5/6' % datetime.now().isoformat()) op.execute('CREATE INDEX cluster_read_uid_date ON cluster ' '(read, user_id, main_date DESC NULLS LAST);') - + print('%s - creating index 6/6' % datetime.now().isoformat()) + op.execute('CREATE INDEX cluster_uid_mlink ON cluster ' + '(user_id, main_link);') def downgrade(): op.add_column('article', diff --git a/src/tests/api/article_test.py b/src/tests/api/article_test.py index 49dab0fc7..e214fe125 100644 --- a/src/tests/api/article_test.py +++ b/src/tests/api/article_test.py @@ -57,16 +57,26 @@ def test_api_creation(self): resp = self._api('post', self.urn, user='user1', data={'feed_id': 1}) self.assertEquals(403, resp.status_code) UserController().update({'login': 'user1'}, {'is_api': True}) - resp = self._api('post', self.urn, user='user1', data={'feed_id': 1}) + + resp = self._api('post', self.urn, user='user1', + data={'feed_id': 1, 'tags': ['tag1', 'tag2']}) + content = resp.json() self.assertEquals(201, resp.status_code) - self.assertEquals(2, resp.json()['user_id']) + self.assertEquals(2, content['user_id']) + self.assertEquals(['tag1', 'tag2'], content['tags']) + + resp = self._api('get', "%s/%s" % (self.urn, content['id'])) + self.assertEquals(['tag1', 'tag2'], resp.json()['tags']) + resp = self._api('post', self.urn, user='user1', data={'feed_id': 1}) self.assertEquals(2, resp.json()['user_id']) self.assertEquals(201, resp.status_code) + resp = self._api('post', self.urn, user='user2', data={'user_id': 2, 'feed_id': 1}) self.assertEquals(403, resp.status_code) UserController().update({'login': 'user2'}, {'is_api': True}) + resp = self._api('post', self.urn, user='user2', data={'user_id': 2, 'feed_id': 1}) self.assertEquals(404, resp.status_code) diff --git a/src/tests/controllers/article_test.py b/src/tests/controllers/article_test.py index b32b86435..8a344d005 100644 --- a/src/tests/controllers/article_test.py +++ b/src/tests/controllers/article_test.py @@ -29,6 +29,15 @@ def test_create_using_filters(self): "pattern": "pattern3", "action on": "match", "action": "mark as read"}]}) + feed_ctr.update({'id': feed2['id']}, + {'filters': [{"type": "tag match", + "pattern": "pattern4", + "action on": "match", + "action": "skipped"}, + {"type": "tag contains", + "pattern": "pattern5", + "action on": "match", + "action": "skipped"}]}) art1 = ArticleController(2).create( entry_id="will be read and faved 1", @@ -104,3 +113,23 @@ def test_create_using_filters(self): link="doesn't matter either8") self.assertFalse(art8.cluster.read) self.assertTrue(art8.cluster.liked) + + art9 = ArticleController(2).create( + entry_id="unique9", + feed_id=feed2['id'], + title="garbage", tags=['garbage', 'pattern4'], + content="doesn't matterç", + link="doesn't matter either9") + self.assertIsNone(art9) + self.assertEquals(0, + ArticleController(2).read(entry_id='unique9').count()) + + art10 = ArticleController(2).create( + entry_id="will be ignored", + feed_id=feed2['id'], + title="garbage", tags=['pattern5 garbage', 'garbage'], + content="doesn't matter10", + link="doesn't matter either10") + self.assertIsNone(art10) + self.assertEquals(0, + ArticleController(2).read(entry_id='unique10').count()) diff --git a/src/tests/crawler_test.py b/src/tests/crawler_test.py index d307d7408..71cdd7ed5 100644 --- a/src/tests/crawler_test.py +++ b/src/tests/crawler_test.py @@ -107,6 +107,24 @@ def test_no_add_on_304(self): resp = self._api('get', 'articles', data={'limit': 1000}, user='admin') self.assertEquals(36, len(resp.json())) + @patch('crawler.http_crawler.JarrUpdater.callback') + def test_no_add_feed_skip(self, jarr_updated_callback): + scheduler = CrawlerScheduler('admin', 'admin') + self.resp_status_code = 304 + resp = self._api('get', 'articles', data={'limit': 1000}, user='admin') + self.assertEquals(36, len(resp.json())) + FeedController().update({}, {'filters': [{"type": "tag contains", + "action on": "match", + "pattern": "pattern5", + "action": "skipped"}]}) + + scheduler.run() + scheduler.wait(**self.wait_params) + self.assertFalse(jarr_updated_callback.called, + "all articles should have been skipped") + resp = self._api('get', 'articles', data={'limit': 1000}, user='admin') + self.assertEquals(36, len(resp.json())) + def test_matching_etag(self): self._reset_feeds_freshness(etag='fake etag') self.resp_headers = {'etag': 'fake etag'} diff --git a/src/tests/fixtures/filler.py b/src/tests/fixtures/filler.py index 643b7bce1..d6abba09e 100644 --- a/src/tests/fixtures/filler.py +++ b/src/tests/fixtures/filler.py @@ -14,7 +14,7 @@ def populate_db(): password=name) for name in ["user1", "user2"]] - def to_name(u, c=None, f=None, a=None): + def to_name(u, c=None, f=None, a=None, *args): string = u.login if c: string += " cat%s" % c @@ -22,7 +22,7 @@ def to_name(u, c=None, f=None, a=None): string += " feed%s" % f if a is not None: string += " art%s" % a - return string + return string + ''.join(args) for k in range(2): article_total = 0 @@ -41,6 +41,8 @@ def to_name(u, c=None, f=None, a=None): acontr.create(entry_id=entry, link='http://test.te/%d' % article_total, feed_id=feed.id, user_id=user.id, + tags=[to_name(user, i, i, j, '1'), + to_name(user, i, i, j, '2')], category_id=cat_id, title=entry, content="content %d" % article_total) diff --git a/src/web/controllers/abstract.py b/src/web/controllers/abstract.py index db84f3728..7d2bcbdc5 100644 --- a/src/web/controllers/abstract.py +++ b/src/web/controllers/abstract.py @@ -4,6 +4,7 @@ from datetime import datetime from collections import defaultdict from sqlalchemy import and_, or_ +from sqlalchemy.ext.associationproxy import AssociationProxy from werkzeug.exceptions import Forbidden, NotFound logger = logging.getLogger(__name__) @@ -144,6 +145,9 @@ def _get_attrs_desc(cls, role, right=None): "right must be 'read' or 'write' with role %r" % role columns = getattr(cls._db_cls, 'fields_%s_%s' % (role, right))() for column in columns: + if isinstance(getattr(cls._db_cls, column), AssociationProxy): + result[column] = {'type': list, 'default': list} + continue try: db_col = getattr(cls._db_cls, column).property.columns[0] except AttributeError: diff --git a/src/web/controllers/article.py b/src/web/controllers/article.py index 20a6c67a2..c22116efe 100644 --- a/src/web/controllers/article.py +++ b/src/web/controllers/article.py @@ -1,4 +1,3 @@ -import re import logging import sqlalchemy from sqlalchemy import func @@ -9,6 +8,7 @@ from .abstract import AbstractController from web.controllers import CategoryController, FeedController from web.models import User, Article +from lib.article_utils import process_filters logger = logging.getLogger(__name__) @@ -50,31 +50,11 @@ def create(self, **attrs): "no right on feed %r" % feed.id attrs['user_id'], attrs['category_id'] = feed.user_id, feed.category_id - # handling feed's filters - cluster_read, cluster_liked = None, False - for filter_ in feed.filters or []: - match = False - if filter_.get('type') == 'regex': - match = re.match(filter_['pattern'], attrs.get('title', '')) - elif filter_.get('type') == 'simple match': - match = filter_['pattern'] in attrs.get('title', '') - take_action = match and filter_.get('action on') == 'match' \ - or not match and filter_.get('action on') == 'no match' - - if not take_action: - continue - - if filter_.get('action') == 'mark as read': - cluster_read = True - logger.info("article article will be created as read %r", - attrs['link']) - elif filter_.get('action') == 'mark as favorite': - cluster_liked = True - logger.info("article article will be created as liked %r", - attrs['link']) - + skipped, read, liked = process_filters(feed.filters, attrs) + if skipped: + return None article = super().create(**attrs) - cluster_contr.clusterize(article, cluster_read, cluster_liked) + cluster_contr.clusterize(article, read, liked) return article def update(self, filters, attrs, *args, **kwargs): @@ -122,7 +102,14 @@ def remove_from_all_clusters(self, article_id): clu_ctrl._enrich_cluster(cluster, cluster.articles[1]) return True - def delete(self, obj_id): + def delete(self, obj_id, commit=True): still_delete_article = self.remove_from_all_clusters(obj_id) if still_delete_article: - return super().delete(obj_id) + obj = self.get(id=obj_id) + for tag in obj.tag_objs: + db.session.delete(tag) + db.session.delete(obj) + if commit: + db.session.flush() + db.session.commit() + return obj diff --git a/src/web/controllers/cluster.py b/src/web/controllers/cluster.py index 93e0b7894..946503337 100644 --- a/src/web/controllers/cluster.py +++ b/src/web/controllers/cluster.py @@ -131,7 +131,8 @@ def join_read(self, feed_id=None, **filters): .filter(cluster_has_feed) else: query = query.join(art_feed_alias, - art_feed_alias.cluster_id == Cluster.id) + and_(art_feed_alias.user_id == self.user_id, + art_feed_alias.cluster_id == Cluster.id)) if filter_on_category: # joining only if filtering on categories to lighten the query # as every article doesn't obligatorily have a category > outerjoin diff --git a/src/web/js/components/RightPanel.react.js b/src/web/js/components/RightPanel.react.js index 42a526e03..f2e1416e1 100644 --- a/src/web/js/components/RightPanel.react.js +++ b/src/web/js/components/RightPanel.react.js @@ -72,47 +72,52 @@ var PanelMixin = { var items = []; var key; if(!this.state.edit_mode) { - this.fields.filter(function(field) { - return field.type != 'ignore'; - }).map(function(field) { - key = this.getKey('dt', field.key); - items.push(
{field.title}
); - key = this.getKey('dd', field.key); - if(field.type == 'string') { - items.push(
{this.props.obj[field.key]}
); - } else if(field.type == 'bool') { - if(this.props.obj[field.key]) { - items.push(
); - } else { - items.push(
); - } - } else if (field.type == 'link') { - items.push(
- - {this.props.obj[field.key]} - -
); - } - }.bind(this)); + this.fields.filter(function(field) { + return field.type != 'ignore'; + }).map(function(field) { + if(field.type == 'list' && this.props.obj[field.key].length == 0) { + return; + } + key = this.getKey('dt', field.key); + items.push(
{field.title}
); + key = this.getKey('dd', field.key); + if(field.type == 'string') { + items.push(
{this.props.obj[field.key]}
); + } else if(field.type == 'list') { + items.push(
{this.props.obj[field.key].join(', ')}
); + } else if(field.type == 'bool') { + if(this.props.obj[field.key]) { + items.push(
); + } else { + items.push(
); + } + } else if (field.type == 'link') { + items.push(
+ + {this.props.obj[field.key]} + +
); + } + }.bind(this)); } else { - this.fields.filter(function(field) { - return field.type != 'ignore'; - }).map(function(field) { - key = this.getKey('dd', field.key); - items.push(
{field.title}
); - key = this.getKey('dt', field.key); - var input = null; - if(field.type == 'string' || field.type == 'link') { - input = (); - } else if (field.type == 'bool') { - input = (); - } - items.push(
{input}
); - }.bind(this)); + this.fields.filter(function(field) { + return field.type != 'ignore'; + }).map(function(field) { + key = this.getKey('dd', field.key); + items.push(
{field.title}
); + key = this.getKey('dt', field.key); + var input = null; + if(field.type == 'string' || field.type == 'link') { + input = (); + } else if (field.type == 'bool') { + input = (); + } + items.push(
{input}
); + }.bind(this)); } return (
{items}
); }, @@ -175,6 +180,7 @@ var Article = React.createClass({ isRemovable: function() {return true;}, fields: [{'title': 'Date', 'type': 'string', 'key': 'date'}, {'title': 'Original link', 'type': 'link', 'key': 'link'}, + {'title': 'Tags', 'type': 'list', 'key': 'tags'}, ], obj_type: 'article', getTitle: function() {return this.props.obj.title;}, @@ -247,14 +253,18 @@ var Feed = React.createClass({ ); }, diff --git a/src/web/js/stores/MenuStore.js b/src/web/js/stores/MenuStore.js index 4e6eb6412..3e7095dca 100644 --- a/src/web/js/stores/MenuStore.js +++ b/src/web/js/stores/MenuStore.js @@ -40,9 +40,9 @@ var MenuStore = assign({}, EventEmitter.prototype, { readCluster: function(cluster, value) { cluster.feeds_id.map(function(feed_id) { this.feeds[feed_id].unread += value; - }.bind(this)); - cluster.categories_id.map(function(category_id) { - this.categories[category_id].unread += value; + if(this.feeds[feed_id].category_id) { + this.categories[this.feeds[feed_id].category_id].unread += value; + } }.bind(this)); }, emitChange: function(all_folded) { diff --git a/src/web/models/__init__.py b/src/web/models/__init__.py index 10cb05e21..9962cb730 100644 --- a/src/web/models/__init__.py +++ b/src/web/models/__init__.py @@ -4,8 +4,9 @@ from .icon import Icon from .category import Category from .cluster import Cluster +from .tag import Tag -__all__ = ['Feed', 'User', 'Article', 'Icon', 'Category', 'Cluster'] +__all__ = ['Feed', 'User', 'Article', 'Icon', 'Category', 'Cluster', 'Tag'] def db_empty(db): diff --git a/src/web/models/article.py b/src/web/models/article.py index c336ce9c3..0e28b0068 100644 --- a/src/web/models/article.py +++ b/src/web/models/article.py @@ -2,6 +2,7 @@ from sqlalchemy import (Column, Index, ForeignKey, Integer, String, Boolean, DateTime) from sqlalchemy.orm import relationship +from sqlalchemy.ext.associationproxy import association_proxy from bootstrap import db from web.models.right_mixin import RightMixin @@ -32,6 +33,11 @@ class Article(db.Model, RightMixin): foreign_keys=[category_id]) feed = relationship('Feed', back_populates='articles', foreign_keys=[feed_id]) + tag_objs = relationship('Tag', back_populates='article', + cascade='all,delete-orphan', + lazy=False, + foreign_keys='[Tag.article_id]') + tags = association_proxy('tag_objs', 'text') # index article_uid_cluid = Index('user_id', 'cluster_id') @@ -49,7 +55,11 @@ def _fields_base_write(): @staticmethod def _fields_base_read(): return {'id', 'entry_id', 'link', 'title', 'content', 'date', - 'retrieved_date', 'user_id'} + 'retrieved_date', 'user_id', 'tags'} + + @staticmethod + def _fields_api_write(): + return {'tags'} def __repr__(self): return " Date: Mon, 10 Oct 2016 22:40:36 +0200 Subject: [PATCH 023/164] fixing littles post deployement bug * waiting on crawler improved * ignoring empty entries * folding all categories fixed * small fix in the feed creation --- src/crawler/http_crawler.py | 31 ++++++++++++++----------- src/lib/article_utils.py | 16 +++++++------ src/manager.py | 5 ++-- src/tests/crawler_test.py | 2 +- src/tests/views/admin_test.py | 2 -- src/web/js/stores/MenuStore.js | 2 +- src/web/js/stores/NotificationsStore.js | 7 +----- src/web/views/feed.py | 2 +- 8 files changed, 33 insertions(+), 34 deletions(-) diff --git a/src/crawler/http_crawler.py b/src/crawler/http_crawler.py index 2bcc41858..263bc9941 100644 --- a/src/crawler/http_crawler.py +++ b/src/crawler/http_crawler.py @@ -56,25 +56,29 @@ def query_jarr(self, method, urn, data=None): self._futures.append(future) return future - def wait(self, max_wait=300, checks=10, wait_for=5): - checked, second_waited = 0, 0 + def wait(self, max_wait=600, wait_for=2): start = datetime.now() max_wait_delta = timedelta(seconds=max_wait) while True: time.sleep(wait_for) - if datetime.now() - start > max_wait_delta: - logger.warn('Exiting after %d seconds', second_waited) + # checking not thread is still running + # some thread are running and we are not behind + if datetime.now() - start <= max_wait_delta \ + and any(fu.running() for fu in self._futures): + # let's wait and see if it's not okay next time + continue + # all thread are done, let's exit + if all(fu.done() for fu in self._futures): break + # some thread are still running and we're gonna future.wait on 'em + wait_minus_passed = max_wait - (datetime.now() - start).seconds + if wait_minus_passed > 0: + max_wait = wait_minus_passed try: # no idea why wait throw ValueError around - not_done = len(wait(self._futures, timeout=max_wait).not_done) + wait(self._futures, timeout=max_wait) except ValueError: - not_done = 1 - if not_done != 0: - checked = 0 - continue - checked += 1 - if checked >= checks: - break + logger.exception('something bad happened:') + break class JarrUpdater(AbstractCrawler): @@ -215,9 +219,10 @@ def callback(self, response): self.feed['id'], self.feed['title']) ids, entries = [], {} - feedparser parsed_response = feedparser.parse(response.content) for entry in parsed_response['entries']: + if not entry: + continue entry_ids = construct_article(entry, self.feed, {'entry_id', 'feed_id', 'user_id', 'tags'}) skipped, _, _ = process_filters(self.feed['filters'], entry_ids, diff --git a/src/lib/article_utils.py b/src/lib/article_utils.py index 29f19b2c2..a0d71db66 100644 --- a/src/lib/article_utils.py +++ b/src/lib/article_utils.py @@ -11,6 +11,7 @@ from web.lib.article_cleaner import clean_urls logger = logging.getLogger(__name__) +PROCESSED_DATE_KEYS = {'published', 'created', 'updated'} def extract_id(entry): @@ -25,14 +26,14 @@ def construct_article(entry, feed, fields=None): article = {} def push_in_article(key, value): - if fields is None or key in fields: + if not fields or key in fields: article[key] = value push_in_article('feed_id', feed['id']) push_in_article('user_id', feed['user_id']) push_in_article('entry_id', extract_id(entry)) push_in_article('retrieved_date', now) - if fields is None or 'date' in fields: - for date_key in ('published', 'created', 'updated'): + if not fields or 'date' in fields: + for date_key in PROCESSED_DATE_KEYS: if entry.get(date_key): try: article['date'] = dateutil.parser.parse(entry[date_key])\ @@ -69,8 +70,8 @@ def get_article_details(entry): # resolves URL behind proxies (like feedproxy.google.com) response = jarr_get(article_link) except Exception as error: - logger.warning("Unable to get the real URL of %s. Won't fix link " - "or title. Error: %s", article_link, error) + logger.warn("Unable to get the real URL of %s. Won't fix link " + "or title. Error: %s", article_link, error) return article_link, article_title or 'No title' article_link = response.url if not article_title: @@ -122,7 +123,7 @@ def process_filters(filters, article, only_actions=None): logger.debug('ignoring filter %r' % filter_) continue - title = article.get('title').lower() + title = article.get('title', '').lower() tags = [tag.lower() for tag in article.get('tags', [])] if filter_type is FiltersType.REGEX: match = re.match(pattern, title) @@ -148,5 +149,6 @@ def process_filters(filters, article, only_actions=None): skipped = True if skipped or read or liked: - logger.info("%r applied on %r", filter_action.value, article['link']) + logger.info("%r applied on %r", filter_action.value, + article.get('link') or article.get('title')) return skipped, read, liked diff --git a/src/manager.py b/src/manager.py index 1f2a6da64..1f148f650 100755 --- a/src/manager.py +++ b/src/manager.py @@ -38,9 +38,8 @@ def fetch(limit=100, retreive_all=False): "Crawl the feeds with the client crawler." from crawler.http_crawler import CrawlerScheduler scheduler = CrawlerScheduler(conf.CRAWLER_LOGIN, conf.CRAWLER_PASSWD) - with scheduler.pool: - scheduler.run(limit=limit, retreive_all=retreive_all) - scheduler.wait() + scheduler.run(limit=limit, retreive_all=retreive_all) + scheduler.wait() @manager.command diff --git a/src/tests/crawler_test.py b/src/tests/crawler_test.py index 71cdd7ed5..e89423725 100644 --- a/src/tests/crawler_test.py +++ b/src/tests/crawler_test.py @@ -13,7 +13,7 @@ class CrawlerTest(JarrFlaskCommon): def setUp(self): super().setUp() - self.wait_params = {'checks': 3, 'wait_for': 2, 'max_wait': 60} + self.wait_params = {'wait_for': 5, 'max_wait': 30} UserController().update({'login': 'admin'}, {'is_api': True}) self._is_secure_served \ = patch('web.lib.article_cleaner.is_secure_served') diff --git a/src/tests/views/admin_test.py b/src/tests/views/admin_test.py index 218128447..06823cfc4 100644 --- a/src/tests/views/admin_test.py +++ b/src/tests/views/admin_test.py @@ -1,5 +1,3 @@ -from io import BytesIO -from mock import patch from tests.base import JarrFlaskCommon from web.controllers import UserController from flask_principal import PermissionDenied diff --git a/src/web/js/stores/MenuStore.js b/src/web/js/stores/MenuStore.js index 3e7095dca..4b4e1eff7 100644 --- a/src/web/js/stores/MenuStore.js +++ b/src/web/js/stores/MenuStore.js @@ -46,7 +46,7 @@ var MenuStore = assign({}, EventEmitter.prototype, { }.bind(this)); }, emitChange: function(all_folded) { - if (all_folded) { + if (all_folded == true || all_folded == false) { this.all_folded = all_folded; } else { this.all_folded = null; diff --git a/src/web/js/stores/NotificationsStore.js b/src/web/js/stores/NotificationsStore.js index 8f9613745..d113414a6 100644 --- a/src/web/js/stores/NotificationsStore.js +++ b/src/web/js/stores/NotificationsStore.js @@ -24,12 +24,7 @@ var NotificationsStore = assign({}, EventEmitter.prototype, { this.notifs = this.notifs.filter(function(notif) {return !notif.read;}); return this.notifs; }, - emitChange: function(all_folded) { - if (all_folded) { - this.all_folded = all_folded; - } else { - this.all_folded = null; - } + emitChange: function() { this.emit(CHANGE_EVENT); }, addChangeListener: function(callback) { diff --git a/src/web/views/feed.py b/src/web/views/feed.py index ce68ad057..ce970126d 100644 --- a/src/web/views/feed.py +++ b/src/web/views/feed.py @@ -34,7 +34,7 @@ def check_feeds(link, site_link): filters = [] if link: filters.append({'link': link}) - if link: + if site_link: filters.append({'site_link': site_link}) filters = {'__or__': filters} if len(filters) > 1 else filters[0] feed_exists = feed_contr.read(**filters).first() From 6757146660bba02589fe6daeaf89777279c54252 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Schmidts?= Date: Mon, 10 Oct 2016 23:30:08 +0200 Subject: [PATCH 024/164] fixing skipping from crawler --- src/crawler/http_crawler.py | 3 ++- src/lib/article_utils.py | 11 ++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/crawler/http_crawler.py b/src/crawler/http_crawler.py index 263bc9941..69c0d527d 100644 --- a/src/crawler/http_crawler.py +++ b/src/crawler/http_crawler.py @@ -224,7 +224,8 @@ def callback(self, response): if not entry: continue entry_ids = construct_article(entry, self.feed, - {'entry_id', 'feed_id', 'user_id', 'tags'}) + {'title', 'entry_id', 'feed_id', 'user_id', 'tags'}, + fetch=False) skipped, _, _ = process_filters(self.feed['filters'], entry_ids, {FiltersAction.SKIP}) if skipped: diff --git a/src/lib/article_utils.py b/src/lib/article_utils.py index a0d71db66..dd1444385 100644 --- a/src/lib/article_utils.py +++ b/src/lib/article_utils.py @@ -20,7 +20,7 @@ def extract_id(entry): return entry.get('entry_id') or entry.get('id') or entry['link'] -def construct_article(entry, feed, fields=None): +def construct_article(entry, feed, fields=None, fetch=True): "Safe method to transorm a feedparser entry into an article" now = datetime.utcnow() article = {} @@ -44,10 +44,11 @@ def push_in_article(key, value): break push_in_article('content', get_article_content(entry)) if fields is None or {'link', 'title'}.intersection(fields): - link, title = get_article_details(entry) + link, title = get_article_details(entry, fetch) push_in_article('link', link) push_in_article('title', title) - push_in_article('content', clean_urls(article['content'], link)) + if 'content' in article: + push_in_article('content', clean_urls(article['content'], link)) push_in_article('tags', [tag.get('term').strip() for tag in entry.get('tags', [])]) return article @@ -62,10 +63,10 @@ def get_article_content(entry): return content -def get_article_details(entry): +def get_article_details(entry, fetch=True): article_link = entry.get('link') article_title = html.unescape(entry.get('title', '')) - if conf.CRAWLER_RESOLV and article_link or not article_title: + if fetch and conf.CRAWLER_RESOLV and article_link or not article_title: try: # resolves URL behind proxies (like feedproxy.google.com) response = jarr_get(article_link) From fe60cb05cc3ccc81a6f42683ed094367b6f2ca85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Schmidts?= Date: Tue, 11 Oct 2016 17:09:55 +0200 Subject: [PATCH 025/164] handling url without scheme in feed --- src/bootstrap.py | 2 +- src/crawler/http_crawler.py | 10 +++++++--- src/lib/article_utils.py | 22 +++++++++++++++++++--- src/lib/utils.py | 10 ++++++---- 4 files changed, 33 insertions(+), 11 deletions(-) diff --git a/src/bootstrap.py b/src/bootstrap.py index bb2914a0a..1765ca7e0 100644 --- a/src/bootstrap.py +++ b/src/bootstrap.py @@ -28,7 +28,7 @@ def set_logging(log_path=None, log_level=logging.INFO, modules=(), log_format='%(asctime)s %(levelname)s %(message)s'): if not modules: - modules = ('root', 'bootstrap', 'runserver', + modules = ('root', 'bootstrap', 'runserver', 'lib', 'web', 'crawler', 'manager', 'plugins') if log_path: handler = logging.FileHandler(log_path) diff --git a/src/crawler/http_crawler.py b/src/crawler/http_crawler.py index 69c0d527d..d1d5c03a7 100644 --- a/src/crawler/http_crawler.py +++ b/src/crawler/http_crawler.py @@ -56,9 +56,10 @@ def query_jarr(self, method, urn, data=None): self._futures.append(future) return future - def wait(self, max_wait=600, wait_for=2): - start = datetime.now() + def wait(self, max_wait=600, wait_for=2, checks=2): + start, checked = datetime.now(), 0 max_wait_delta = timedelta(seconds=max_wait) + time.sleep(wait_for * 3) while True: time.sleep(wait_for) # checking not thread is still running @@ -67,6 +68,9 @@ def wait(self, max_wait=600, wait_for=2): and any(fu.running() for fu in self._futures): # let's wait and see if it's not okay next time continue + if checks == checked: + checked += 1 + continue # all thread are done, let's exit if all(fu.done() for fu in self._futures): break @@ -155,7 +159,7 @@ def callback(self, response): return # meaningless if no new article has been published logger.info('%r %r - pushing feed attrs %r', self.feed['id'], self.feed['title'], - {key: "%s -> %s" % (up_feed[key], self.feed.get(key)) + {key: "%r -> %r" % (self.feed.get(key), up_feed[key]) for key in up_feed if up_feed[key] != self.feed.get(key)}) self.query_jarr('put', 'feed/%d' % self.feed['id'], up_feed) diff --git a/src/lib/article_utils.py b/src/lib/article_utils.py index dd1444385..2009be509 100644 --- a/src/lib/article_utils.py +++ b/src/lib/article_utils.py @@ -3,6 +3,8 @@ import logging from enum import Enum import dateutil.parser +from urllib.parse import urlsplit, urlunsplit, SplitResult +from requests.exceptions import MissingSchema from datetime import datetime, timezone from bs4 import BeautifulSoup, SoupStrainer @@ -69,10 +71,24 @@ def get_article_details(entry, fetch=True): if fetch and conf.CRAWLER_RESOLV and article_link or not article_title: try: # resolves URL behind proxies (like feedproxy.google.com) - response = jarr_get(article_link) + response = jarr_get(article_link, timeout=5) + except MissingSchema: + split, failed = urlsplit(article_link), False + for scheme in 'https', 'http': + new_link = urlunsplit(SplitResult(scheme, *split[1:])) + try: + response = jarr_get(new_link, timeout=5) + except Exception as error: + failed = True + continue + failed = False + article_link = new_link + break + if failed: + return article_link, article_title or 'No title' except Exception as error: - logger.warn("Unable to get the real URL of %s. Won't fix link " - "or title. Error: %s", article_link, error) + logger.info("Unable to get the real URL of %s. Won't fix " + "link or title. Error: %s", article_link, error) return article_link, article_title or 'No title' article_link = response.url if not article_title: diff --git a/src/lib/utils.py b/src/lib/utils.py index 187f7e636..e6226eb32 100644 --- a/src/lib/utils.py +++ b/src/lib/utils.py @@ -80,7 +80,9 @@ def redirect_url(default='home'): return request.args.get('next') or request.referrer or url_for(default) -def jarr_get(url): - return requests.get(url, verify=False, allow_redirects=True, - timeout=conf.CRAWLER_TIMEOUT, - headers={'User-Agent': conf.CRAWLER_USER_AGENT}) +def jarr_get(url, **kwargs): + request_kwargs = {'verify': False, 'allow_redirects': True, + 'timeout': conf.CRAWLER_TIMEOUT, + 'headers': {'User-Agent': conf.CRAWLER_USER_AGENT}} + request_kwargs.update(kwargs) + return requests.get(url, **request_kwargs) From dd84cf37f93ba2b520340c844be354298d95eac2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Schmidts?= Date: Wed, 12 Oct 2016 01:01:03 +0200 Subject: [PATCH 026/164] improving feed deletion, no more browsing all the articles --- src/web/controllers/feed.py | 38 ++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/src/web/controllers/feed.py b/src/web/controllers/feed.py index 1c975dfd3..af90df4b1 100644 --- a/src/web/controllers/feed.py +++ b/src/web/controllers/feed.py @@ -1,6 +1,6 @@ import logging from sqlalchemy import and_ -from sqlalchemy.sql import select, update +from sqlalchemy.sql import select, update, delete from datetime import datetime, timedelta from bootstrap import db, conf, SQLITE_ENGINE @@ -131,7 +131,39 @@ def update(self, filters, attrs, *args, **kwargs): return super().update(filters, attrs, *args, **kwargs) def delete(self, obj_id): + from web.controllers.cluster import ClusterController + feed = self.get(id=obj_id) actrl = self.__get_art_contr() - for article in self.get(id=obj_id).articles: - actrl.remove_from_all_clusters(article.id) + clu_ctrl = ClusterController(self.user_id) + + # removing back ref from cluster to article + clu_ctrl.update({'main_article_id__in': actrl.read(feed_id=obj_id) + .with_entities('id')}, + {'main_article_id': None}) + + def select_art(col): + return select([col]).where(and_(Cluster.id == Article.cluster_id, + Article.user_id == feed.user_id))\ + .order_by(Article.date.asc()).limit(1) + + # removing articles + db.session.execute(delete(Article).where( + and_(Article.feed_id == feed.id, + Article.user_id == feed.user_id))) + + # reclustering + clu_ctrl.update({'main_article_id': None}, + {'main_title': select_art(Article.title), + 'main_article_id': select_art(Article.id), + 'main_feed_title': select([Feed.title]) + .where(and_( + Cluster.id == Article.cluster_id, + Article.user_id == feed.user_id, + Feed.id == Article.feed_id, + Feed.user_id == feed.user_id)) + .order_by(Article.date.asc()).limit(1)}) + # removing remaing clusters + db.session.execute(delete(Cluster).where( + and_(Cluster.user_id == feed.user_id, + Cluster.main_article_id.__eq__(None)))) return super().delete(obj_id) From f39e2a9840a9ea9d66f2ff2c780635e07653987e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Schmidts?= Date: Wed, 12 Oct 2016 01:48:11 +0200 Subject: [PATCH 027/164] testing missing scheme handling --- src/crawler/http_crawler.py | 28 +++++++--------- src/lib/article_utils.py | 16 ++++++++- src/tests/crawler_test.py | 8 +++++ src/tests/libs/article_utils_test.py | 49 +++++++++++++++++++++++----- 4 files changed, 76 insertions(+), 25 deletions(-) diff --git a/src/crawler/http_crawler.py b/src/crawler/http_crawler.py index d1d5c03a7..914504c50 100644 --- a/src/crawler/http_crawler.py +++ b/src/crawler/http_crawler.py @@ -24,7 +24,7 @@ from requests_futures.sessions import FuturesSession from lib.utils import default_handler, to_hash from lib.feed_utils import construct_feed_from, is_parsing_ok -from lib.article_utils import construct_article, process_filters, FiltersAction +from lib.article_utils import construct_article, get_skip_and_ids logger = logging.getLogger(__name__) logging.captureWarnings(True) @@ -59,7 +59,7 @@ def query_jarr(self, method, urn, data=None): def wait(self, max_wait=600, wait_for=2, checks=2): start, checked = datetime.now(), 0 max_wait_delta = timedelta(seconds=max_wait) - time.sleep(wait_for * 3) + time.sleep(wait_for * 2) while True: time.sleep(wait_for) # checking not thread is still running @@ -122,11 +122,6 @@ def callback(self, response): entries[-1]['user_id'], id_to_create) self.query_jarr('post', 'articles', entries) - logger.debug('%r %r - updating feed etag %r last_mod %r', - self.feed['id'], self.feed['title'], - self.headers.get('etag', ''), - self.headers.get('last-modified', '')) - up_feed = {'etag': self.headers.get('etag', ''), 'last_modified': self.headers.get('last-modified', strftime('%a, %d %b %Y %X %Z', gmtime()))} @@ -150,6 +145,9 @@ def callback(self, response): # re-getting that feed earlier since new entries appeared if article_created: up_feed['last_retrieved'] = datetime.utcnow() + else: + logger.info('%r %r - all article matched in db, adding nothing', + self.feed['id'], self.feed['title']) diff_keys = {key for key in up_feed if up_feed[key] != self.feed.get(key)} @@ -224,24 +222,22 @@ def callback(self, response): ids, entries = [], {} parsed_response = feedparser.parse(response.content) + skipped_list = [] for entry in parsed_response['entries']: if not entry: continue - entry_ids = construct_article(entry, self.feed, - {'title', 'entry_id', 'feed_id', 'user_id', 'tags'}, - fetch=False) - skipped, _, _ = process_filters(self.feed['filters'], entry_ids, - {FiltersAction.SKIP}) + skipped, entry_ids = get_skip_and_ids(entry, self.feed) if skipped: + skipped_list.append(entry_ids) logger.debug('%r %r - skipping article', self.feed['id'], self.feed['title']) continue - del entry_ids['tags'] entries[tuple(sorted(entry_ids.items()))] = entry ids.append(entry_ids) - if not ids: - logger.info('%r %r - all articles skipped, adding nothing', - self.feed['id'], self.feed['title']) + if not ids and skipped_list: + logger.debug('%r %r - nothing to add (skipped %r) %r', + self.feed['id'], self.feed['title'], skipped_list, + parsed_response) return logger.debug('%r %r - found %d entries %r', self.feed['id'], self.feed['title'], len(ids), ids) diff --git a/src/lib/article_utils.py b/src/lib/article_utils.py index 2009be509..c719217c1 100644 --- a/src/lib/article_utils.py +++ b/src/lib/article_utils.py @@ -139,7 +139,12 @@ def process_filters(filters, article, only_actions=None): if filter_action not in only_actions: logger.debug('ignoring filter %r' % filter_) continue - + if filter_action in {FiltersType.REGEX, FiltersType.MATCH, + FiltersType.EXACT_MATCH} and 'title' not in article: + continue + if filter_action in {FiltersType.TAG_MATCH, FiltersType.TAG_CONTAINS} \ + and 'tags' not in article: + continue title = article.get('title', '').lower() tags = [tag.lower() for tag in article.get('tags', [])] if filter_type is FiltersType.REGEX: @@ -169,3 +174,12 @@ def process_filters(filters, article, only_actions=None): logger.info("%r applied on %r", filter_action.value, article.get('link') or article.get('title')) return skipped, read, liked + + +def get_skip_and_ids(entry, feed): + entry_ids = construct_article(entry, feed, + {'entry_id', 'feed_id', 'user_id'}, fetch=False) + skipped, _, _ = process_filters(feed['filters'], + construct_article(entry, feed, {'title', 'tags'}, fetch=False), + {FiltersAction.SKIP}) + return skipped, entry_ids diff --git a/src/tests/crawler_test.py b/src/tests/crawler_test.py index e89423725..e487d44de 100644 --- a/src/tests/crawler_test.py +++ b/src/tests/crawler_test.py @@ -114,6 +114,14 @@ def test_no_add_feed_skip(self, jarr_updated_callback): resp = self._api('get', 'articles', data={'limit': 1000}, user='admin') self.assertEquals(36, len(resp.json())) FeedController().update({}, {'filters': [{"type": "tag contains", + "action on": "match", + "pattern": "pattern5", + "action": "skipped"}, + {"type": "simple match", + "action on": "match", + "pattern": "pattern5", + "action": "mark as read"}, + {"type": "regex", "action on": "match", "pattern": "pattern5", "action": "skipped"}]}) diff --git a/src/tests/libs/article_utils_test.py b/src/tests/libs/article_utils_test.py index ee584e74d..f914f02c6 100644 --- a/src/tests/libs/article_utils_test.py +++ b/src/tests/libs/article_utils_test.py @@ -1,30 +1,63 @@ from tests.base import JarrFlaskCommon import json from mock import patch +from requests.exceptions import MissingSchema from lib.article_utils import construct_article class ConstructArticleTest(JarrFlaskCommon): + response_url = '//www.pariszigzag.fr/paris-insolite-secret/'\ + 'les-plus-belles-boulangeries-de-paris' - @patch('lib.article_utils.jarr_get') - def test_missing_title(self, jarr_get): + def setUp(self): + self._jarr_get_patch = patch('lib.article_utils.jarr_get') + self.jarr_get_patch = self._jarr_get_patch.start() + + def tearDown(self): + self._jarr_get_patch.stop() + + @staticmethod + def get_entry(): + with open('src/tests/fixtures/article.json') as fd: + return json.load(fd) + + @staticmethod + def get_response(scheme='http:'): class Response: @property def url(self): - return 'http://www.pariszigzag.fr/paris-insolite-secret/'\ - 'les-plus-belles-boulangeries-de-paris' + return scheme + ConstructArticleTest.response_url + @property def content(self): with open('src/tests/fixtures/article.html') as fd: return fd.read() - jarr_get.return_value = Response() - with open('src/tests/fixtures/article.json') as fd: - entry = json.load(fd) + return Response() + + def test_missing_title(self): + self.jarr_get_patch.return_value = self.get_response('http:') + article = construct_article(self.get_entry(), {'id': 1, 'user_id': 1}) + self.assertEquals('http://www.pariszigzag.fr/?p=56413', + article['entry_id']) + self.assertEquals('http:' + self.response_url, article['link']) + self.assertEquals('Les plus belles boulangeries de Paris', + article['title']) + self.assertEquals(1, article['user_id']) + self.assertEquals(1, article['feed_id']) + + def test_missing_scheme(self): + response = self.get_response('https:') + self.jarr_get_patch.side_effect = [MissingSchema, response] + entry = self.get_entry() + entry['link'] = entry['link'][5:] article = construct_article(entry, {'id': 1, 'user_id': 1}) + + self.assertEquals(2, self.jarr_get_patch.call_count) + self.assertEquals(response.url, self.jarr_get_patch.call_args[0][0]) self.assertEquals('http://www.pariszigzag.fr/?p=56413', article['entry_id']) - self.assertEquals(Response().url, article['link']) + self.assertEquals(response.url, article['link']) self.assertEquals('Les plus belles boulangeries de Paris', article['title']) self.assertEquals(1, article['user_id']) From 1b1a56cef416d0001f3c58e0e10b134b172d1dea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Schmidts?= Date: Wed, 12 Oct 2016 09:55:22 +0200 Subject: [PATCH 028/164] adding tests --- src/tests/controllers/feed_test.py | 26 ++++++++++++++++++++++++++ src/tests/fixtures/filler.py | 23 +++++++++++++---------- 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/src/tests/controllers/feed_test.py b/src/tests/controllers/feed_test.py index 9055e85a9..9561296ec 100644 --- a/src/tests/controllers/feed_test.py +++ b/src/tests/controllers/feed_test.py @@ -13,6 +13,32 @@ def test_delete(self): self.assertEquals(0, ClusterController(2).read().count()) self.assertEquals(0, ArticleController(2).read().count()) + def test_delete_main_cluster_handling(self): + clu = ClusterController().get(id=10) + old_title = clu.main_title + old_feed_title, old_art_id = clu.main_feed_title, clu.main_article_id + self.assertEquals(2, len(clu.articles)) + FeedController(clu.user_id).delete(clu.main_article.feed_id) + new_cluster = ClusterController(clu.user_id).get(id=clu.id) + self.assertEquals(1, len(new_cluster.articles)) + self.assertNotEquals(old_title, new_cluster.main_title) + self.assertNotEquals(old_feed_title, new_cluster.main_feed_title) + self.assertNotEquals(old_art_id, new_cluster.main_article_id) + + def test_delete_cluster_handling(self): + clu = ClusterController().get(id=10) + old_title = clu.main_title + old_feed_title, old_art_id = clu.main_feed_title, clu.main_article_id + self.assertEquals(2, len(clu.articles)) + FeedController(clu.user_id).delete( # deleting not main article + next(art.feed_id for art in clu.articles + if art.id != clu.main_article_id)) + new_cluster = ClusterController(clu.user_id).get(id=clu.id) + self.assertEquals(1, len(new_cluster.articles)) + self.assertEquals(old_title, new_cluster.main_title) + self.assertEquals(old_feed_title, new_cluster.main_feed_title) + self.assertEquals(old_art_id, new_cluster.main_article_id) + def test_feed_rights(self): feed = FeedController(2).read()[0].dump() self.assertTrue(3, diff --git a/src/tests/fixtures/filler.py b/src/tests/fixtures/filler.py index d6abba09e..e384da592 100644 --- a/src/tests/fixtures/filler.py +++ b/src/tests/fixtures/filler.py @@ -1,3 +1,4 @@ +from datetime import datetime, timedelta from manager import db_create, db_empty from web.controllers import UserController, CategoryController, \ FeedController, ArticleController @@ -13,19 +14,20 @@ def populate_db(): user1, user2 = [ucontr.create(login=name, email="%s@test.te" % name, password=name) for name in ["user1", "user2"]] - - def to_name(u, c=None, f=None, a=None, *args): - string = u.login - if c: - string += " cat%s" % c - if f is not None: - string += " feed%s" % f - if a is not None: - string += " art%s" % a - return string + ''.join(args) + now = datetime.now() for k in range(2): article_total = 0 + + def to_name(u, c=None, f=None, a=None, *args): + string = "i%d %s" % (k, u.login) + if c: + string += " cat%s" % c + if f is not None: + string += " feed%s" % f + if a is not None: + string += " art%s" % a + return string + ''.join(args) for user in (user1, user2): for i in range(3): cat_id = None @@ -44,6 +46,7 @@ def to_name(u, c=None, f=None, a=None, *args): tags=[to_name(user, i, i, j, '1'), to_name(user, i, i, j, '2')], category_id=cat_id, title=entry, + date=now + timedelta(seconds=k), content="content %d" % article_total) def reset_db(): From 3a5b1d4bafd960a34caaad6624a386bdc51cd238 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Schmidts?= Date: Wed, 12 Oct 2016 10:35:17 +0200 Subject: [PATCH 029/164] fix for reading cluster when not admin --- src/web/models/cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/web/models/cluster.py b/src/web/models/cluster.py index f531b92e0..a13c1ed39 100644 --- a/src/web/models/cluster.py +++ b/src/web/models/cluster.py @@ -72,7 +72,7 @@ def _fields_base_write(): def _fields_base_read(): return {'id', 'user_id', 'categories_id', 'feeds_id', 'main_link', 'main_title', 'main_feed_title', 'main_date', - 'created_date', 'cluster_type', 'articles'} + 'created_date', 'cluster_type', 'articles', 'main_article_id'} def __repr__(self): return "" \ From ccb27e52ab947363323e4109904d59bc919471a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Schmidts?= Date: Wed, 12 Oct 2016 11:49:49 +0200 Subject: [PATCH 030/164] getting rid of dead code --- src/web/var/english-stop-words.txt | 311 ------------------ src/web/var/french-stop-words.txt | 176 ---------- src/web/var/generate-top-words-list.sh | 8 - .../stop_words/english-stop-words-list.txt | 1 - .../var/stop_words/french-stop-words-list.txt | 1 - 5 files changed, 497 deletions(-) delete mode 100644 src/web/var/english-stop-words.txt delete mode 100644 src/web/var/french-stop-words.txt delete mode 100755 src/web/var/generate-top-words-list.sh delete mode 100644 src/web/var/stop_words/english-stop-words-list.txt delete mode 100644 src/web/var/stop_words/french-stop-words-list.txt diff --git a/src/web/var/english-stop-words.txt b/src/web/var/english-stop-words.txt deleted file mode 100644 index 497a1f965..000000000 --- a/src/web/var/english-stop-words.txt +++ /dev/null @@ -1,311 +0,0 @@ - - | An English stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | Many of the forms below are quite rare (e.g. "yourselves") but included for - | completeness. - - | PRONOUNS FORMS - | 1st person sing - -i | subject, always in upper case of course - -me | object -my | possessive adjective - | the possessive pronoun `mine' is best suppressed, because of the - | sense of coal-mine etc. -myself | reflexive - | 1st person plural -we | subject - -| us | object - | care is required here because US = United States. It is usually - | safe to remove it if it is in lower case. -our | possessive adjective -ours | possessive pronoun -ourselves | reflexive - | second person (archaic `thou' forms not included) -you | subject and object -your | possessive adjective -yours | possessive pronoun -yourself | reflexive (singular) -yourselves | reflexive (plural) - | third person singular -he | subject -him | object -his | possessive adjective and pronoun -himself | reflexive - -she | subject -her | object and possessive adjective -hers | possessive pronoun -herself | reflexive - -it | subject and object -its | possessive adjective -itself | reflexive - | third person plural -they | subject -them | object -their | possessive adjective -theirs | possessive pronoun -themselves | reflexive - | other forms (demonstratives, interrogatives) -what -which -who -whom -this -that -these -those - - | VERB FORMS (using F.R. Palmer's nomenclature) - | BE -am | 1st person, present -is | -s form (3rd person, present) -are | present -was | 1st person, past -were | past -be | infinitive -been | past participle -being | -ing form - | HAVE -have | simple -has | -s form -had | past -having | -ing form - | DO -do | simple -does | -s form -did | past -doing | -ing form - - | The forms below are, I believe, best omitted, because of the significant - | homonym forms: - - | He made a WILL - | old tin CAN - | merry month of MAY - | a smell of MUST - | fight the good fight with all thy MIGHT - - | would, could, should, ought might however be included - - | | AUXILIARIES - | | WILL - |will - -would - - | | SHALL - |shall - -should - - | | CAN - |can - -could - - | | MAY - |may - |might - | | MUST - |must - | | OUGHT - -ought - - | COMPOUND FORMS, increasingly encountered nowadays in 'formal' writing - | pronoun + verb - -i'm -you're -he's -she's -it's -we're -they're -i've -you've -we've -they've -i'd -you'd -he'd -she'd -we'd -they'd -i'll -you'll -he'll -she'll -we'll -they'll - - | verb + negation - -isn't -aren't -wasn't -weren't -hasn't -haven't -hadn't -doesn't -don't -didn't - - | auxiliary + negation - -won't -wouldn't -shan't -shouldn't -can't -cannot -couldn't -mustn't - - | miscellaneous forms - -let's -that's -who's -what's -here's -there's -when's -where's -why's -how's - - | rarer forms - - | daren't needn't - - | doubtful forms - - | oughtn't mightn't - - | ARTICLES -a -an -the - - | THE REST (Overlap among prepositions, conjunctions, adverbs etc is so - | high, that classification is pointless.) -and -but -if -or -because -as -until -while - -of -at -by -for -with -about -against -between -into -through -during -before -after -above -below -to -from -up -down -in -out -on -off -over -under - -again -further -then -once - -here -there -when -where -why -how - -all -any -both -each -few -more -most -other -some -such - -no -nor -not -only -own -same -so -than -too -very - - | Just for the record, the following words are among the commonest in English - - | one - | every - | least - | less - | many - | now - | ever - | never - | say - | says - | said - | also - | get - | go - | goes - | just - | made - | make - | put - | see - | seen - | whether - | like - | well - | back - | even - | still - | way - | take - | since - | another - | however - | two - | three - | four - | five - | first - | second - | new - | old - | high - | long \ No newline at end of file diff --git a/src/web/var/french-stop-words.txt b/src/web/var/french-stop-words.txt deleted file mode 100644 index 08a2f5d74..000000000 --- a/src/web/var/french-stop-words.txt +++ /dev/null @@ -1,176 +0,0 @@ - - | A French stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - -au | a + le -aux | a + les -avec | with -ce | this -ces | these -dans | with -de | of -des | de + les -du | de + le -elle | she -en | `of them' etc -et | and -eux | them -il | he -je | I -la | the -le | the -leur | their -lui | him -ma | my (fem) -mais | but -me | me -même | same; as in moi-même (myself) etc -mes | me (pl) -moi | me -mon | my (masc) -ne | not -nos | our (pl) -notre | our -nous | we -on | one -ou | where -par | by -pas | not -pour | for -qu | que before vowel -que | that -qui | who -sa | his, her (fem) -se | oneself -ses | his (pl) -son | his, her (masc) -sur | on -ta | thy (fem) -te | thee -tes | thy (pl) -toi | thee -ton | thy (masc) -tu | thou -un | a -une | a -vos | your (pl) -votre | your -vous | you - - | single letter forms - -c | c' -d | d' -j | j' -l | l' -à | to, at -m | m' -n | n' -s | s' -t | t' -y | there - - | forms of être (not including the infinitive): -été -étée -étées -étés -étant -suis -es -est -sommes -êtes -sont -serai -seras -sera -serons -serez -seront -serais -serait -serions -seriez -seraient -étais -était -étions -étiez -étaient -fus -fut -fûmes -fûtes -furent -sois -soit -soyons -soyez -soient -fusse -fusses -fût -fussions -fussiez -fussent - - | forms of avoir (not including the infinitive): -ayant -eu -eue -eues -eus -ai -as -avons -avez -ont -aurai -auras -aura -aurons -aurez -auront -aurais -aurait -aurions -auriez -auraient -avais -avait -avions -aviez -avaient -eut -eûmes -eûtes -eurent -aie -aies -ait -ayons -ayez -aient -eusse -eusses -eût -eussions -eussiez -eussent - - | Later additions (from Jean-Christophe Deschamps) -ceci | this -celà | that -cet | this -cette | this -ici | here -ils | they -les | the (pl) -leurs | their (pl) -quel | which -quels | which -quelle | which -quelles | which -sans | without -soi | oneself \ No newline at end of file diff --git a/src/web/var/generate-top-words-list.sh b/src/web/var/generate-top-words-list.sh deleted file mode 100755 index 2a87e1472..000000000 --- a/src/web/var/generate-top-words-list.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh - -if test $# != 2 ; then - echo No input files given 1>&2 - exit 1 -fi - -awk 'BEGIN{FS = " "} { if ($1 ~ /^[A-Za-z]/) {print $1}}' $1 | sort | tr '\n' ';' > $2 \ No newline at end of file diff --git a/src/web/var/stop_words/english-stop-words-list.txt b/src/web/var/stop_words/english-stop-words-list.txt deleted file mode 100644 index caa26aaf7..000000000 --- a/src/web/var/stop_words/english-stop-words-list.txt +++ /dev/null @@ -1 +0,0 @@ -a;about;above;after;again;against;all;am;an;and;any;are;aren't;as;at;be;because;been;before;being;below;between;both;but;by;cannot;can't;could;couldn't;did;didn't;do;does;doesn't;doing;don't;down;during;each;few;for;from;further;had;hadn't;has;hasn't;have;haven't;having;he;he'd;he'll;her;here;here's;hers;herself;he's;him;himself;his;how;how's;i;i'd;if;i'll;i'm;in;into;is;isn't;it;its;it's;itself;i've;let's;me;more;most;mustn't;my;myself;no;nor;not;of;off;on;once;only;or;other;ought;our;ours;ourselves;out;over;own;same;shan't;she;she'd;she'll;she's;should;shouldn't;slashdot;so;some;such;than;that;that's;the;their;theirs;them;themselves;then;there;there's;these;they;they'd;they'll;they're;they've;this;those;through;to;too;under;until;up;very;was;wasn't;we;we'd;we'll;were;we're;weren't;we've;what;what's;when;when's;where;where's;which;while;who;whom;who's;why;why's;with;won't;would;wouldn't;writes;you;you'd;you'll;your;you're;yours;yourself;yourselves;you've; diff --git a/src/web/var/stop_words/french-stop-words-list.txt b/src/web/var/stop_words/french-stop-words-list.txt deleted file mode 100644 index a6a36c799..000000000 --- a/src/web/var/stop_words/french-stop-words-list.txt +++ /dev/null @@ -1 +0,0 @@ -à;ai;aie;aient;aies;ait;as;au;aura;aurai;auraient;aurais;aurait;auras;aurez;auriez;aurions;aurons;auront;aux;avaient;avais;avait;avec;avez;aviez;avions;avons;ayant;ayez;ayons;c;ce;ceci;celà;ces;cet;cette;d;dans;de;des;du;elle;en;es;est;et;étaient;étais;était;étant;été;étée;étées;êtes;étés;étiez;étions;eu;eue;eues;eûmes;eurent;eus;eusse;eussent;eusses;eussiez;eussions;eut;eût;eûtes;eux;fûmes;furent;fus;fusse;fussent;fusses;fussiez;fussions;fut;fût;fûtes;ici;il;ils;j;je;l;la;le;les;leur;leurs;lui;m;ma;mais;me;même;mes;moi;mon;n;ne;nos;notre;nous;on;ont;ou;par;pas;pour;qu;que;quel;quelle;quelles;quels;qui;s;sa;sans;se;sera;serai;seraient;serais;serait;seras;serez;seriez;serions;serons;seront;ses;soi;soient;sois;soit;sommes;son;sont;soyez;soyons;suis;sur;t;ta;te;tes;toi;ton;tu;toujours;un;une;vos;votre;vous;y; From cadd132c87ee05058e9d196151fa47565bb41c2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Schmidts?= Date: Wed, 12 Oct 2016 17:41:16 +0200 Subject: [PATCH 031/164] miscellaneous fixes * fixing building feed: * won't override link * will update all other attributes * won't erase icon_url with broken ones * feed dict instance isn't modified anymore * improving the crawler waiting method... again * improving the crawler feed updating * adding test coverage * fixing use cases when adding feed through bookmarklet * fixing clustering: not clustering article too far away in time --- src/crawler/http_crawler.py | 92 ++++++++++++++-------------- src/lib/feed_utils.py | 87 +++++++++++++------------- src/lib/utils.py | 2 +- src/tests/crawler_test.py | 2 +- src/tests/libs/article_utils_test.py | 11 ++-- src/tests/libs/feed_utils_test.py | 4 +- src/web/controllers/cluster.py | 5 ++ src/web/controllers/feed.py | 2 +- src/web/views/feed.py | 2 +- 9 files changed, 110 insertions(+), 97 deletions(-) diff --git a/src/crawler/http_crawler.py b/src/crawler/http_crawler.py index 914504c50..daa1318a7 100644 --- a/src/crawler/http_crawler.py +++ b/src/crawler/http_crawler.py @@ -12,7 +12,6 @@ to create the missing entries """ -import html import time import json import logging @@ -28,12 +27,12 @@ logger = logging.getLogger(__name__) logging.captureWarnings(True) +FUTURES = [] class AbstractCrawler: pool = ThreadPoolExecutor(max_workers=conf.CRAWLER_NBWORKER) session = FuturesSession(executor=pool) - _futures = [] def __init__(self, auth): self.auth = auth @@ -53,10 +52,10 @@ def query_jarr(self, method, urn, data=None): data=json.dumps(data, default=default_handler), headers={'Content-Type': 'application/json', 'User-Agent': conf.CRAWLER_USER_AGENT}) - self._futures.append(future) + FUTURES.append(future) return future - def wait(self, max_wait=600, wait_for=2, checks=2): + def wait(self, max_wait=600, wait_for=2, checks=10): start, checked = datetime.now(), 0 max_wait_delta = timedelta(seconds=max_wait) time.sleep(wait_for * 2) @@ -65,21 +64,21 @@ def wait(self, max_wait=600, wait_for=2, checks=2): # checking not thread is still running # some thread are running and we are not behind if datetime.now() - start <= max_wait_delta \ - and any(fu.running() for fu in self._futures): + and any(fu.running() for fu in FUTURES): # let's wait and see if it's not okay next time continue - if checks == checked: + if checks > checked: checked += 1 continue # all thread are done, let's exit - if all(fu.done() for fu in self._futures): + if all(fu.done() for fu in FUTURES): break # some thread are still running and we're gonna future.wait on 'em wait_minus_passed = max_wait - (datetime.now() - start).seconds if wait_minus_passed > 0: max_wait = wait_minus_passed try: # no idea why wait throw ValueError around - wait(self._futures, timeout=max_wait) + wait(FUTURES, timeout=max_wait) except ValueError: logger.exception('something bad happened:') break @@ -114,51 +113,53 @@ def callback(self, response): entries = [] for id_to_create in results: article_created = True - entries.append(construct_article( + entry = construct_article( self.entries[tuple(sorted(id_to_create.items()))], - self.feed)) + self.feed) logger.info('%r %r - creating %r for %r - %r', self.feed['id'], - self.feed['title'], entries[-1]['title'], - entries[-1]['user_id'], id_to_create) + self.feed['title'], entry.get('title'), + entry.get('user_id'), id_to_create) + entries.append(entry) self.query_jarr('post', 'articles', entries) - up_feed = {'etag': self.headers.get('etag', ''), - 'last_modified': self.headers.get('last-modified', - strftime('%a, %d %b %Y %X %Z', gmtime()))} - if not is_parsing_ok(self.parsed_feed): - up_feed['last_error'] = str(self.parsed_feed.get('bozo_exception')) - up_feed['error_count'] = self.feed['error_count'] + 1 - return self.query_jarr('put', 'feed/%d' % self.feed['id'], up_feed) - - fresh_feed = construct_feed_from(url=self.feed['link'], - fp_parsed=self.parsed_feed, - feed=self.feed) - if fresh_feed.get('description'): - fresh_feed['description'] \ - = html.unescape(fresh_feed['description']) - - for key in 'description', 'site_link', 'icon_url': - if fresh_feed.get(key) and fresh_feed[key] != self.feed.get(key): - up_feed[key] = fresh_feed[key] + logger.info('%r %r - parsing failed, bumping error count', + self.feed['id'], self.feed['title']) + return self.query_jarr('put', 'feed/%d' % self.feed['id'], + {'last_error': str(self.parsed_feed.get('bozo_exception')), + 'error_count': self.feed['error_count'] + 1}) + + up_feed = construct_feed_from(url=self.feed['link'], + fp_parsed=self.parsed_feed, + feed=self.feed) + + up_feed['etag'] = self.headers.get('etag', '') + up_feed['last_modified'] = self.headers.get('last-modified', + strftime('%a, %d %b %Y %X %Z', gmtime())) + + up_feed.pop('link', None) # not updating feed url up_feed['user_id'] = self.feed['user_id'] - # re-getting that feed earlier since new entries appeared - if article_created: - up_feed['last_retrieved'] = datetime.utcnow() - else: + up_feed['last_error'] = None + up_feed['error_count'] = 0 + if not article_created: logger.info('%r %r - all article matched in db, adding nothing', self.feed['id'], self.feed['title']) - diff_keys = {key for key in up_feed - if up_feed[key] != self.feed.get(key)} - if not diff_keys: + # updating only changed attrs + up_feed = {key: value for key, value in up_feed.items() + if up_feed[key] != self.feed.get(key)} + if not up_feed: + logger.info('%r %r - nothing to update in feed attrs', + self.feed['id'], self.feed['title']) return # no change in the feed, no update - if not article_created and diff_keys == {'last_modified', 'etag'}: + if not article_created and set(up_feed) == {'last_modified', 'etag'}: + logger.info('%r %r - feed changed attrs are meaningless, ignoring', + self.feed['id'], self.feed['title']) return # meaningless if no new article has been published logger.info('%r %r - pushing feed attrs %r', self.feed['id'], self.feed['title'], {key: "%r -> %r" % (self.feed.get(key), up_feed[key]) - for key in up_feed if up_feed[key] != self.feed.get(key)}) + for key in up_feed}) self.query_jarr('put', 'feed/%d' % self.feed['id'], up_feed) @@ -173,7 +174,7 @@ def clean_feed(self): """Will reset the errors counters on a feed that have known errors""" if self.feed.get('error_count') or self.feed.get('last_error'): self.query_jarr('put', 'feed/%d' % self.feed['id'], - {'error_count': 0, 'last_error': ''}) + {'error_count': 0, 'last_error': None}) def callback(self, response): """will fetch the feed and interprete results (304, etag) or will @@ -183,9 +184,10 @@ def callback(self, response): response.raise_for_status() except Exception as error: error_count = self.feed['error_count'] + 1 - logger.warn('%r %r - an error occured while fetching ' - 'feed; bumping error count to %r', - self.feed['id'], self.feed['title'], error_count) + if self.feed['error_count'] > conf.FEED_ERROR_THRESHOLD: + logger.warn('%r %r - an error occured while fetching ' + 'feed; bumping error count to %r', + self.feed['id'], self.feed['title'], error_count) future = self.query_jarr('put', 'feed/%d' % self.feed['id'], {'error_count': error_count, 'last_error': str(error), @@ -221,7 +223,7 @@ def callback(self, response): self.feed['id'], self.feed['title']) ids, entries = [], {} - parsed_response = feedparser.parse(response.content) + parsed_response = feedparser.parse(response.content.strip()) skipped_list = [] for entry in parsed_response['entries']: if not entry: @@ -283,7 +285,7 @@ def callback(self, response): future = self.session.get(feed['link'], timeout=conf.CRAWLER_TIMEOUT, headers=self.prepare_headers(feed)) - self._futures.append(future) + FUTURES.append(future) feed_crwlr = FeedCrawler(feed, self.auth) future.add_done_callback(feed_crwlr.callback) diff --git a/src/lib/feed_utils.py b/src/lib/feed_utils.py index ff1885b23..005d3a443 100644 --- a/src/lib/feed_utils.py +++ b/src/lib/feed_utils.py @@ -2,6 +2,7 @@ import urllib import logging import feedparser +from copy import deepcopy from bootstrap import conf from bs4 import BeautifulSoup, SoupStrainer @@ -25,7 +26,7 @@ def _escape_title_and_desc(feed): def _browse_feedparser_feed(feed, check, default=None): - if feed.get('feed', {}).get('links') is None: + if not feed.get('feed', {}).get('links'): return default for link in feed['feed']['links']: if check(link): @@ -33,6 +34,16 @@ def _browse_feedparser_feed(feed, check, default=None): return default +def get_parsed_feed(url): + try: + fp_parsed = feedparser.parse(url, + request_headers={'User-Agent': conf.CRAWLER_USER_AGENT}) + except Exception as error: + logger.warn('failed to retreive that url: %r', error) + fp_parsed = {'bozo': 1, 'feed': {}, 'entries': []} + return fp_parsed + + def construct_feed_from(url=None, fp_parsed=None, feed=None): """ Will try to construct the most complete feed dict possible. @@ -41,45 +52,29 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None): fp_parsed: a feedparser object previously obtained feed: an existing feed dict, will be updated """ - feed = feed or {'link': url, 'site_link': url} - if not url and fp_parsed: + feed = deepcopy(feed) if feed else {} + if not url and hasattr(fp_parsed, 'get') and fp_parsed.get('href'): url = fp_parsed.get('href') # we'll try to obtain our first parsing from feedparser if url and not fp_parsed: - try: - fp_parsed = feedparser.parse(url, - request_headers={'User-Agent': conf.CRAWLER_USER_AGENT}) - except Exception as error: - logger.warn('failed to retreive that url: %r', error) - fp_parsed = {'bozo': 1, 'feed': {}, 'entries': []} + fp_parsed = get_parsed_feed(url) assert url is not None and fp_parsed is not None - if not is_parsing_ok(fp_parsed): - feed['link'] = None - - # updating link - feed['link'] = _browse_feedparser_feed(fp_parsed, - lambda link: link['type'] in FEED_MIMETYPES, - default=feed.get('link')) + if is_parsing_ok(fp_parsed): + feed['link'] = url + else: + # trying to get link url from data parsed by feedparser + feed['link'] = _browse_feedparser_feed(fp_parsed, + lambda link: link['type'] in FEED_MIMETYPES, + default=feed.get('link')) + if feed['link'] and feed['link'] != url: + # trying newly got url + fp_parsed = get_parsed_feed(feed['link']) + if not is_parsing_ok(fp_parsed): # didn't work, no link found + feed['link'] = None + feed['site_link'] = url - # parsing failed but we obtained a new link to try - if not is_parsing_ok(fp_parsed) and feed.get('link') != url: - try: - fp_parsed = feedparser.parse(feed['link'], - request_headers={'User-Agent': conf.CRAWLER_USER_AGENT}) - except Exception as error: - logger.warn('failed to retreive that url: %r', error) - fp_parsed = {'bozo': 1, 'feed': {}, 'entries': []} - url = feed['link'] - - if not is_parsing_ok(fp_parsed): - feed['link'] = None - - # extracting maximum values from parsed feed - feed['link'] = _browse_feedparser_feed(fp_parsed, - lambda link: link['type'] in FEED_MIMETYPES, - default=feed.get('link') or url) if fp_parsed['feed'].get('link'): feed['site_link'] = fp_parsed['feed']['link'] else: @@ -100,17 +95,9 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None): elif fp_parsed['feed'].get('subtitle_detail', {}).get('value'): feed['description'] = fp_parsed['feed']['subtitle_detail']['value'] - feed['icon_url'] = _browse_feedparser_feed(fp_parsed, - lambda link: 'icon' in link['rel'], default=feed.get('icon_url')) - if 'icon_url' not in feed: - del feed['icon_url'] - # trying to make up for missing values feed_split = urllib.parse.urlsplit(url) site_split = None - if not feed.get('site_link') and not feed.get('link'): - feed['site_link'] = url - if feed.get('site_link'): feed['site_link'] = rebuild_url(feed['site_link'], feed_split) site_split = urllib.parse.urlsplit(feed['site_link']) @@ -121,6 +108,24 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None): if feed['icon_url'] is None: del feed['icon_url'] + old_icon_url = feed.get('icon_url') + feed['icon_url'] = _browse_feedparser_feed(fp_parsed, + lambda link: 'icon' in link['rel']) + if feed['icon_url'] and feed['icon_url'] != old_icon_url: + feed['icon_url'] = try_get_icon_url(feed['icon_url'], + site_split, feed_split) + + if not feed['icon_url'] and fp_parsed.get('feed', {}).get('icon'): + feed['icon_url'] = try_get_icon_url(fp_parsed['feed']['icon'], + site_split, feed_split) + if not feed['icon_url'] and fp_parsed.get('feed', {}).get('logo'): + feed['icon_url'] = try_get_icon_url(fp_parsed['feed']['logo'], + site_split, feed_split) + if not feed['icon_url'] and old_icon_url: + feed['icon_url'] = old_icon_url + elif not feed['icon_url']: + del feed['icon_url'] + nothing_to_fill = all(bool(feed.get(key)) for key in ('link', 'title', 'icon_url')) # here we have all we want or we do not have the main url, diff --git a/src/lib/utils.py b/src/lib/utils.py index e6226eb32..16a1a3526 100644 --- a/src/lib/utils.py +++ b/src/lib/utils.py @@ -38,7 +38,7 @@ def rebuild_url(url, base_split): new_split = urllib.parse.SplitResult( scheme=split.scheme or base_split.scheme, netloc=split.netloc or base_split.netloc, - path=split.path, query='', fragment='') + path=split.path, query=split.query, fragment=split.fragment) return urllib.parse.urlunsplit(new_split) diff --git a/src/tests/crawler_test.py b/src/tests/crawler_test.py index e487d44de..5cb713dc8 100644 --- a/src/tests/crawler_test.py +++ b/src/tests/crawler_test.py @@ -13,7 +13,7 @@ class CrawlerTest(JarrFlaskCommon): def setUp(self): super().setUp() - self.wait_params = {'wait_for': 5, 'max_wait': 30} + self.wait_params = {'wait_for': 1, 'max_wait': 10, 'checks': 1} UserController().update({'login': 'admin'}, {'is_api': True}) self._is_secure_served \ = patch('web.lib.article_cleaner.is_secure_served') diff --git a/src/tests/libs/article_utils_test.py b/src/tests/libs/article_utils_test.py index f914f02c6..60ce3b8d1 100644 --- a/src/tests/libs/article_utils_test.py +++ b/src/tests/libs/article_utils_test.py @@ -1,11 +1,11 @@ -from tests.base import JarrFlaskCommon +import unittest import json from mock import patch from requests.exceptions import MissingSchema from lib.article_utils import construct_article -class ConstructArticleTest(JarrFlaskCommon): +class ConstructArticleTest(unittest.TestCase): response_url = '//www.pariszigzag.fr/paris-insolite-secret/'\ 'les-plus-belles-boulangeries-de-paris' @@ -46,14 +46,15 @@ def test_missing_title(self): self.assertEquals(1, article['feed_id']) def test_missing_scheme(self): - response = self.get_response('https:') - self.jarr_get_patch.side_effect = [MissingSchema, response] + response = self.get_response('http:') + self.jarr_get_patch.side_effect = [ + MissingSchema, MissingSchema, response] entry = self.get_entry() entry['link'] = entry['link'][5:] article = construct_article(entry, {'id': 1, 'user_id': 1}) - self.assertEquals(2, self.jarr_get_patch.call_count) + self.assertEquals(3, self.jarr_get_patch.call_count) self.assertEquals(response.url, self.jarr_get_patch.call_args[0][0]) self.assertEquals('http://www.pariszigzag.fr/?p=56413', article['entry_id']) diff --git a/src/tests/libs/feed_utils_test.py b/src/tests/libs/feed_utils_test.py index 4a42bdf42..5ddc769ab 100644 --- a/src/tests/libs/feed_utils_test.py +++ b/src/tests/libs/feed_utils_test.py @@ -1,8 +1,8 @@ -from tests.base import JarrFlaskCommon +import unittest from lib.feed_utils import construct_feed_from -class ConstructFeedFromTest(JarrFlaskCommon): +class ConstructFeedFromTest(unittest.TestCase): @property def jdh_feed(self): diff --git a/src/web/controllers/cluster.py b/src/web/controllers/cluster.py index 946503337..5ad934225 100644 --- a/src/web/controllers/cluster.py +++ b/src/web/controllers/cluster.py @@ -1,4 +1,5 @@ import logging +from datetime import timedelta from bootstrap import db, SQLITE_ENGINE from sqlalchemy import func, Integer, and_ @@ -18,12 +19,16 @@ class ClusterController(AbstractController): def _get_cluster_by_link(self, article): return self.read(user_id=article.user_id, + main_date__lt=article.date + timedelta(days=7), + main_date__gt=article.date - timedelta(days=7), main_link=article.link).first() def _get_cluster_by_title(self, article): if article.category and article.category.cluster_on_title: try: article = ArticleController(self.user_id).get( + date__lt=article.date + timedelta(days=7), + date__gt=article.date - timedelta(days=7), user_id=article.user_id, category_id=article.category_id, title__ilike=article.title) diff --git a/src/web/controllers/feed.py b/src/web/controllers/feed.py index af90df4b1..cc00e510c 100644 --- a/src/web/controllers/feed.py +++ b/src/web/controllers/feed.py @@ -92,7 +92,7 @@ def __clean_feed_fields(self, attrs): if attrs.get('category_id') == 0: attrs['category_id'] = None if 'filters' in attrs: - attrs['filters'] = [filter_ for filter_ in attrs['filters'] + attrs['filters'] = [filter_ for filter_ in (attrs['filters'] or []) if type(filter_) is dict] def create(self, **attrs): diff --git a/src/web/views/feed.py b/src/web/views/feed.py index ce970126d..b40eb9dcc 100644 --- a/src/web/views/feed.py +++ b/src/web/views/feed.py @@ -57,7 +57,7 @@ def check_feeds(link, site_link): feed = construct_feed_from(url) - existing_feed = check_feeds(feed.get('link'), feed.get('site_link')) + existing_feed = check_feeds(feed.get('link'), feed.get('link')) if existing_feed: return redirect(url_for('home', at='f', ai=existing_feed.id)) From 64b8c5d0a882c07d2a4f9b13119511f6ed121aee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Schmidts?= Date: Fri, 21 Oct 2016 13:25:50 +0200 Subject: [PATCH 032/164] sorting imports --- src/bootstrap.py | 5 +++-- src/crawler/http_crawler.py | 16 +++++++++------- src/lib/article_utils.py | 9 +++++---- src/lib/conf_handling.py | 6 +++--- src/lib/feed_utils.py | 9 +++++---- src/lib/utils.py | 8 +++++--- src/manager.py | 7 ++++--- src/migrations/env.py | 14 ++++++++------ .../versions/122ac0c356c_new_users.py | 7 ++++--- ...fe_changed_the_type_of_the_column_last_.py | 6 ++++-- .../versions/19bdaa6208e_add_icon_column.py | 2 +- ...389c22_remove_email_notification_column.py | 5 +++-- ...a207_mv_icons_from_feed_tbl_to_icon_tbl.py | 2 +- .../versions/3f83bfe93fc_adding_category.py | 5 +++-- .../422da2d0234_adding_filters_field.py | 2 +- .../48f561c0ce6_add_column_entry_id.py | 5 +++-- .../493abdb2b73_readability_integration.py | 5 +++-- ...835c03754c69_enhancing_comparison_style.py | 3 ++- .../versions/9462d9753423_tag_handling.py | 2 +- .../versions/9e3fecc9d031_oauth_linuxfr.py | 5 +++-- .../versions/a7f62d50d366_clustering.py | 6 ++++-- ...ea_adding_feed_and_user_attributes_for_.py | 5 +++-- src/plugins/readability.py | 2 +- src/runserver.py | 4 +++- src/scripts/probes.py | 4 +++- src/tests/api/article_test.py | 2 +- src/tests/api/category_test.py | 2 +- src/tests/api/cluster_test.py | 2 +- src/tests/api/feed_test.py | 2 +- src/tests/base.py | 8 +++++--- src/tests/controllers/article_test.py | 2 +- src/tests/controllers/category_test.py | 4 ++-- src/tests/controllers/cluster_test.py | 7 ++++--- src/tests/controllers/feed_test.py | 4 ++-- src/tests/crawler_test.py | 8 +++++--- src/tests/fixtures/filler.py | 5 +++-- src/tests/libs/article_utils_test.py | 4 +++- src/tests/libs/feed_utils_test.py | 1 + src/tests/views/admin_test.py | 4 ++-- src/tests/views/home_test.py | 4 +++- src/tests/views/user_test.py | 7 ++++--- src/web/controllers/abstract.py | 8 +++++--- src/web/controllers/article.py | 12 +++++++----- src/web/controllers/category.py | 3 ++- src/web/controllers/cluster.py | 14 ++++++++------ src/web/controllers/feed.py | 9 +++++---- src/web/controllers/icon.py | 2 ++ src/web/controllers/user.py | 7 +++++-- src/web/forms.py | 12 +++++++----- src/web/lib/article_cleaner.py | 4 +++- src/web/lib/view_utils.py | 12 +++++++----- src/web/models/article.py | 8 +++++--- src/web/models/category.py | 4 ++-- src/web/models/cluster.py | 6 ++++-- src/web/models/feed.py | 6 ++++-- src/web/models/icon.py | 3 ++- src/web/models/tag.py | 1 + src/web/models/user.py | 5 +++-- src/web/utils.py | 2 +- src/web/views/admin.py | 12 ++++++------ src/web/views/api/article.py | 11 ++++++----- src/web/views/api/category.py | 7 +++---- src/web/views/api/cluster.py | 4 ++-- src/web/views/api/common.py | 9 +++++---- src/web/views/api/feed.py | 11 ++++------- src/web/views/article.py | 2 +- src/web/views/cluster.py | 3 ++- src/web/views/common.py | 8 +++++--- src/web/views/feed.py | 11 +++++------ src/web/views/home.py | 19 ++++++++----------- src/web/views/icon.py | 2 ++ src/web/views/session_mgmt.py | 17 ++++++++--------- src/web/views/user.py | 17 ++++++++--------- src/web/views/views.py | 5 +++-- 74 files changed, 265 insertions(+), 201 deletions(-) diff --git a/src/bootstrap.py b/src/bootstrap.py index 1765ca7e0..83608396b 100644 --- a/src/bootstrap.py +++ b/src/bootstrap.py @@ -3,13 +3,14 @@ # required imports and code exection for basic functionning -import os import logging +import os from urllib.parse import urlparse + from flask import Flask from flask_sqlalchemy import SQLAlchemy -from lib.conf_handling import ConfObject +from lib.conf_handling import ConfObject conf = ConfObject() # handling on the fly migration to new conf style diff --git a/src/crawler/http_crawler.py b/src/crawler/http_crawler.py index daa1318a7..de7dc9c29 100644 --- a/src/crawler/http_crawler.py +++ b/src/crawler/http_crawler.py @@ -12,18 +12,20 @@ to create the missing entries """ -import time import json import logging -import feedparser -from bootstrap import conf -from time import strftime, gmtime +import time +from concurrent.futures import ThreadPoolExecutor, wait from datetime import datetime, timedelta -from concurrent.futures import wait, ThreadPoolExecutor +from time import gmtime, strftime + +import feedparser from requests_futures.sessions import FuturesSession -from lib.utils import default_handler, to_hash -from lib.feed_utils import construct_feed_from, is_parsing_ok + +from bootstrap import conf from lib.article_utils import construct_article, get_skip_and_ids +from lib.feed_utils import construct_feed_from, is_parsing_ok +from lib.utils import default_handler, to_hash logger = logging.getLogger(__name__) logging.captureWarnings(True) diff --git a/src/lib/article_utils.py b/src/lib/article_utils.py index c719217c1..4c7742d48 100644 --- a/src/lib/article_utils.py +++ b/src/lib/article_utils.py @@ -1,12 +1,13 @@ -import re import html import logging +import re +from datetime import datetime, timezone from enum import Enum +from urllib.parse import SplitResult, urlsplit, urlunsplit + import dateutil.parser -from urllib.parse import urlsplit, urlunsplit, SplitResult -from requests.exceptions import MissingSchema -from datetime import datetime, timezone from bs4 import BeautifulSoup, SoupStrainer +from requests.exceptions import MissingSchema from bootstrap import conf from lib.utils import jarr_get diff --git a/src/lib/conf_handling.py b/src/lib/conf_handling.py index 13f594781..4f864a8a9 100644 --- a/src/lib/conf_handling.py +++ b/src/lib/conf_handling.py @@ -1,8 +1,8 @@ -import os import json -import random import logging -from os.path import abspath, join, dirname +import os +import random +from os.path import abspath, dirname, join logger = logging.getLogger(__name__) ROOT = abspath(join(dirname(globals()['__file__']), '../../..')) diff --git a/src/lib/feed_utils.py b/src/lib/feed_utils.py index 005d3a443..7f7ac57e1 100644 --- a/src/lib/feed_utils.py +++ b/src/lib/feed_utils.py @@ -1,12 +1,13 @@ import html -import urllib import logging -import feedparser +import urllib from copy import deepcopy -from bootstrap import conf + +import feedparser from bs4 import BeautifulSoup, SoupStrainer -from lib.utils import try_get_icon_url, rebuild_url, jarr_get +from bootstrap import conf +from lib.utils import jarr_get, rebuild_url, try_get_icon_url logger = logging.getLogger(__name__) logging.captureWarnings(True) diff --git a/src/lib/utils.py b/src/lib/utils.py index 16a1a3526..71faaa2b9 100644 --- a/src/lib/utils.py +++ b/src/lib/utils.py @@ -1,12 +1,14 @@ +import logging import re import types import urllib -import logging -import requests from hashlib import md5 -from bootstrap import conf + +import requests from flask import request, url_for +from bootstrap import conf + logger = logging.getLogger(__name__) diff --git a/src/manager.py b/src/manager.py index 1f148f650..8dd0d3b58 100755 --- a/src/manager.py +++ b/src/manager.py @@ -2,13 +2,14 @@ # -*- coding: utf-8 -*- import logging from datetime import datetime, timedelta -from flask_script import Manager + from flask_migrate import Migrate, MigrateCommand +from flask_script import Manager -from bootstrap import application, db, conf import web.models -from web.controllers import FeedController, UserController +from bootstrap import application, conf, db from scripts.probes import ArticleProbe, FeedProbe +from web.controllers import FeedController, UserController logger = logging.getLogger(__name__) Migrate(application, db) diff --git a/src/migrations/env.py b/src/migrations/env.py index 597778f9a..347667300 100644 --- a/src/migrations/env.py +++ b/src/migrations/env.py @@ -1,8 +1,15 @@ import logging -from bootstrap import set_logging + from alembic import context +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata +from flask import current_app from sqlalchemy import engine_from_config, pool +from bootstrap import set_logging + # this is the Alembic Config object, which provides # access to the values within the .ini file in use. config = context.config @@ -14,11 +21,6 @@ log_level=logging.WARNING) set_logging(modules=('alembic'), log_format=log_format) -# add your model's MetaData object here -# for 'autogenerate' support -# from myapp import mymodel -# target_metadata = mymodel.Base.metadata -from flask import current_app config.set_main_option('sqlalchemy.url', current_app.config.get('SQLALCHEMY_DATABASE_URI')) target_metadata = current_app.extensions['migrate'].db.metadata diff --git a/src/migrations/versions/122ac0c356c_new_users.py b/src/migrations/versions/122ac0c356c_new_users.py index 6237615d2..ffa7a57a2 100644 --- a/src/migrations/versions/122ac0c356c_new_users.py +++ b/src/migrations/versions/122ac0c356c_new_users.py @@ -12,10 +12,11 @@ branch_labels = None depends_on = None -from bootstrap import conf -from alembic import op import sqlalchemy as sa -from sqlalchemy.sql import table, column +from alembic import op +from sqlalchemy.sql import column, table + +from bootstrap import conf def get_tables(): diff --git a/src/migrations/versions/17dcb75f3fe_changed_the_type_of_the_column_last_.py b/src/migrations/versions/17dcb75f3fe_changed_the_type_of_the_column_last_.py index 2f5a756a5..309988240 100644 --- a/src/migrations/versions/17dcb75f3fe_changed_the_type_of_the_column_last_.py +++ b/src/migrations/versions/17dcb75f3fe_changed_the_type_of_the_column_last_.py @@ -11,9 +11,11 @@ down_revision = 'cde34831ea' from datetime import datetime -from bootstrap import conf -from alembic import op + import sqlalchemy as sa +from alembic import op + +from bootstrap import conf def upgrade(): diff --git a/src/migrations/versions/19bdaa6208e_add_icon_column.py b/src/migrations/versions/19bdaa6208e_add_icon_column.py index 2efa376f9..35149d530 100644 --- a/src/migrations/versions/19bdaa6208e_add_icon_column.py +++ b/src/migrations/versions/19bdaa6208e_add_icon_column.py @@ -10,8 +10,8 @@ revision = '19bdaa6208e' down_revision = '422da2d0234' -from alembic import op import sqlalchemy as sa +from alembic import op def upgrade(): diff --git a/src/migrations/versions/1b750a389c22_remove_email_notification_column.py b/src/migrations/versions/1b750a389c22_remove_email_notification_column.py index bcb6e084c..240525231 100644 --- a/src/migrations/versions/1b750a389c22_remove_email_notification_column.py +++ b/src/migrations/versions/1b750a389c22_remove_email_notification_column.py @@ -10,9 +10,10 @@ revision = '1b750a389c22' down_revision = '48f561c0ce6' -from bootstrap import conf -from alembic import op import sqlalchemy as sa +from alembic import op + +from bootstrap import conf def upgrade(): diff --git a/src/migrations/versions/25ca960a207_mv_icons_from_feed_tbl_to_icon_tbl.py b/src/migrations/versions/25ca960a207_mv_icons_from_feed_tbl_to_icon_tbl.py index a1b7c157d..b12ecfbb0 100644 --- a/src/migrations/versions/25ca960a207_mv_icons_from_feed_tbl_to_icon_tbl.py +++ b/src/migrations/versions/25ca960a207_mv_icons_from_feed_tbl_to_icon_tbl.py @@ -10,8 +10,8 @@ revision = '25ca960a207' down_revision = '19bdaa6208e' -from alembic import op import sqlalchemy as sa +from alembic import op from bootstrap import conf diff --git a/src/migrations/versions/3f83bfe93fc_adding_category.py b/src/migrations/versions/3f83bfe93fc_adding_category.py index 126acd4dd..3fecf9afa 100644 --- a/src/migrations/versions/3f83bfe93fc_adding_category.py +++ b/src/migrations/versions/3f83bfe93fc_adding_category.py @@ -9,9 +9,10 @@ revision = '3f83bfe93fc' down_revision = '25ca960a207' -from bootstrap import conf -from alembic import op import sqlalchemy as sa +from alembic import op + +from bootstrap import conf def upgrade(): diff --git a/src/migrations/versions/422da2d0234_adding_filters_field.py b/src/migrations/versions/422da2d0234_adding_filters_field.py index bcbdf042d..b40d8cd87 100644 --- a/src/migrations/versions/422da2d0234_adding_filters_field.py +++ b/src/migrations/versions/422da2d0234_adding_filters_field.py @@ -10,8 +10,8 @@ revision = '422da2d0234' down_revision = '17dcb75f3fe' -from alembic import op import sqlalchemy as sa +from alembic import op def upgrade(): diff --git a/src/migrations/versions/48f561c0ce6_add_column_entry_id.py b/src/migrations/versions/48f561c0ce6_add_column_entry_id.py index 6ca8bca94..ca7de975c 100644 --- a/src/migrations/versions/48f561c0ce6_add_column_entry_id.py +++ b/src/migrations/versions/48f561c0ce6_add_column_entry_id.py @@ -12,9 +12,10 @@ branch_labels = None depends_on = None -from bootstrap import conf -from alembic import op import sqlalchemy as sa +from alembic import op + +from bootstrap import conf def upgrade(): diff --git a/src/migrations/versions/493abdb2b73_readability_integration.py b/src/migrations/versions/493abdb2b73_readability_integration.py index 3665dc600..eadc5aa0d 100644 --- a/src/migrations/versions/493abdb2b73_readability_integration.py +++ b/src/migrations/versions/493abdb2b73_readability_integration.py @@ -9,9 +9,10 @@ # revision identifiers, used by Alembic. revision = '493abdb2b73' down_revision = '3f83bfe93fc' -from bootstrap import conf -from alembic import op import sqlalchemy as sa +from alembic import op + +from bootstrap import conf def upgrade(): diff --git a/src/migrations/versions/835c03754c69_enhancing_comparison_style.py b/src/migrations/versions/835c03754c69_enhancing_comparison_style.py index 9924401b7..87fdf06a4 100644 --- a/src/migrations/versions/835c03754c69_enhancing_comparison_style.py +++ b/src/migrations/versions/835c03754c69_enhancing_comparison_style.py @@ -12,8 +12,9 @@ branch_labels = None depends_on = None -from alembic import op import sqlalchemy as sa +from alembic import op + from web.models.article import Article diff --git a/src/migrations/versions/9462d9753423_tag_handling.py b/src/migrations/versions/9462d9753423_tag_handling.py index 8f5434c5f..18735bddc 100644 --- a/src/migrations/versions/9462d9753423_tag_handling.py +++ b/src/migrations/versions/9462d9753423_tag_handling.py @@ -12,8 +12,8 @@ branch_labels = None depends_on = None -from alembic import op import sqlalchemy as sa +from alembic import op def upgrade(): diff --git a/src/migrations/versions/9e3fecc9d031_oauth_linuxfr.py b/src/migrations/versions/9e3fecc9d031_oauth_linuxfr.py index 178d7de68..5c0f442c6 100644 --- a/src/migrations/versions/9e3fecc9d031_oauth_linuxfr.py +++ b/src/migrations/versions/9e3fecc9d031_oauth_linuxfr.py @@ -12,9 +12,10 @@ branch_labels = None depends_on = None -from bootstrap import conf -from alembic import op import sqlalchemy as sa +from alembic import op + +from bootstrap import conf def upgrade(): diff --git a/src/migrations/versions/a7f62d50d366_clustering.py b/src/migrations/versions/a7f62d50d366_clustering.py index 567149dab..df03f1f0a 100644 --- a/src/migrations/versions/a7f62d50d366_clustering.py +++ b/src/migrations/versions/a7f62d50d366_clustering.py @@ -13,9 +13,11 @@ depends_on = None from datetime import datetime -from bootstrap import SQLITE_ENGINE -from alembic import op + import sqlalchemy as sa +from alembic import op + +from bootstrap import SQLITE_ENGINE def upgrade(): diff --git a/src/migrations/versions/cde34831ea_adding_feed_and_user_attributes_for_.py b/src/migrations/versions/cde34831ea_adding_feed_and_user_attributes_for_.py index 63c93bf40..a76f58669 100644 --- a/src/migrations/versions/cde34831ea_adding_feed_and_user_attributes_for_.py +++ b/src/migrations/versions/cde34831ea_adding_feed_and_user_attributes_for_.py @@ -9,11 +9,12 @@ # revision identifiers, used by Alembic. revision = 'cde34831ea' down_revision = '1b750a389c22' -from bootstrap import conf from datetime import datetime -from alembic import op import sqlalchemy as sa +from alembic import op + +from bootstrap import conf def upgrade(): diff --git a/src/plugins/readability.py b/src/plugins/readability.py index 9bcfc9767..0c1d50b4a 100644 --- a/src/plugins/readability.py +++ b/src/plugins/readability.py @@ -1,7 +1,7 @@ import logging -import requests from urllib.parse import urlencode +import requests logger = logging.getLogger(__name__) READABILITY_PARSER = 'https://www.readability.com/api/content/v1/parser?' diff --git a/src/runserver.py b/src/runserver.py index 5734d9826..31c5a1fe0 100755 --- a/src/runserver.py +++ b/src/runserver.py @@ -1,9 +1,11 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- import calendar + from flask import request from flask_babel import Babel -from bootstrap import conf, application + +from bootstrap import application, conf if conf.ON_HEROKU: from flask_sslify import SSLify diff --git a/src/scripts/probes.py b/src/scripts/probes.py index e4b6b93a6..d9c4f815a 100644 --- a/src/scripts/probes.py +++ b/src/scripts/probes.py @@ -1,10 +1,12 @@ #!/usr/bin/python3 import sys from datetime import datetime, timedelta + from flask_script import Command, Option -from web.controllers import FeedController, ArticleController +from web.controllers import ArticleController, FeedController from web.models import User + DEFAULT_HEADERS = {'Content-Type': 'application/json', 'User-Agent': 'munin'} LATE_AFTER = 60 FETCH_RATE = 3 diff --git a/src/tests/api/article_test.py b/src/tests/api/article_test.py index e214fe125..0fef3560a 100644 --- a/src/tests/api/article_test.py +++ b/src/tests/api/article_test.py @@ -1,5 +1,5 @@ -from tests.base import JarrFlaskCommon from tests.api.common import ApiCommon +from tests.base import JarrFlaskCommon from web.controllers import UserController diff --git a/src/tests/api/category_test.py b/src/tests/api/category_test.py index 0f7967c1c..31063596d 100644 --- a/src/tests/api/category_test.py +++ b/src/tests/api/category_test.py @@ -1,5 +1,5 @@ -from tests.base import JarrFlaskCommon from tests.api.common import ApiCommon +from tests.base import JarrFlaskCommon class CategoryApiTest(JarrFlaskCommon, ApiCommon): diff --git a/src/tests/api/cluster_test.py b/src/tests/api/cluster_test.py index 084a66473..583af2353 100644 --- a/src/tests/api/cluster_test.py +++ b/src/tests/api/cluster_test.py @@ -1,5 +1,5 @@ -from tests.base import JarrFlaskCommon from tests.api.common import ApiCommon +from tests.base import JarrFlaskCommon class ClusterApiTest(JarrFlaskCommon, ApiCommon): diff --git a/src/tests/api/feed_test.py b/src/tests/api/feed_test.py index c6d403275..c6837fdd2 100644 --- a/src/tests/api/feed_test.py +++ b/src/tests/api/feed_test.py @@ -1,5 +1,5 @@ -from tests.base import JarrFlaskCommon from tests.api.common import ApiCommon +from tests.base import JarrFlaskCommon from web.controllers import UserController diff --git a/src/tests/base.py b/src/tests/base.py index 65b87e444..051654dfb 100644 --- a/src/tests/base.py +++ b/src/tests/base.py @@ -4,14 +4,16 @@ import json import logging import unittest -from os import path from base64 import b64encode -from runserver import application -from tests.fixtures.filler import populate_db, reset_db +from os import path + from flask_login import login_user, logout_user from werkzeug.exceptions import NotFound from bootstrap import conf +from runserver import application +from tests.fixtures.filler import populate_db, reset_db + logger = logging.getLogger('web') diff --git a/src/tests/controllers/article_test.py b/src/tests/controllers/article_test.py index 8a344d005..b5de78711 100644 --- a/src/tests/controllers/article_test.py +++ b/src/tests/controllers/article_test.py @@ -1,5 +1,5 @@ from tests.base import BaseJarrTest -from web.controllers import UserController, ArticleController, FeedController +from web.controllers import ArticleController, FeedController, UserController class ArticleControllerTest(BaseJarrTest): diff --git a/src/tests/controllers/category_test.py b/src/tests/controllers/category_test.py index 3bf309669..1062aff54 100644 --- a/src/tests/controllers/category_test.py +++ b/src/tests/controllers/category_test.py @@ -1,6 +1,6 @@ from tests.base import BaseJarrTest -from web.controllers import (UserController, CategoryController, - FeedController, ArticleController) +from web.controllers import (ArticleController, CategoryController, + FeedController, UserController) class CategoryControllerTest(BaseJarrTest): diff --git a/src/tests/controllers/cluster_test.py b/src/tests/controllers/cluster_test.py index 1cd1e05f9..fe18bdd69 100644 --- a/src/tests/controllers/cluster_test.py +++ b/src/tests/controllers/cluster_test.py @@ -1,8 +1,9 @@ -from random import randint from datetime import timedelta +from random import randint + from tests.base import BaseJarrTest -from web.controllers import (ArticleController, ClusterController, - FeedController, CategoryController) +from web.controllers import (ArticleController, CategoryController, + ClusterController, FeedController) class ClusterControllerTest(BaseJarrTest): diff --git a/src/tests/controllers/feed_test.py b/src/tests/controllers/feed_test.py index 9561296ec..2be589af6 100644 --- a/src/tests/controllers/feed_test.py +++ b/src/tests/controllers/feed_test.py @@ -1,6 +1,6 @@ from tests.base import BaseJarrTest -from web.controllers import (UserController, FeedController, - ArticleController, ClusterController) +from web.controllers import (ArticleController, ClusterController, + FeedController, UserController) class FeedControllerTest(BaseJarrTest): diff --git a/src/tests/crawler_test.py b/src/tests/crawler_test.py index 5cb713dc8..8b92e2bb5 100644 --- a/src/tests/crawler_test.py +++ b/src/tests/crawler_test.py @@ -1,11 +1,13 @@ -from tests.base import JarrFlaskCommon import logging -from mock import Mock, patch from datetime import datetime +from mock import Mock, patch + from bootstrap import conf from crawler.http_crawler import CrawlerScheduler -from web.controllers import UserController, FeedController +from tests.base import JarrFlaskCommon +from web.controllers import FeedController, UserController + logger = logging.getLogger('web') diff --git a/src/tests/fixtures/filler.py b/src/tests/fixtures/filler.py index e384da592..7b992f5cf 100644 --- a/src/tests/fixtures/filler.py +++ b/src/tests/fixtures/filler.py @@ -1,7 +1,8 @@ from datetime import datetime, timedelta + from manager import db_create, db_empty -from web.controllers import UserController, CategoryController, \ - FeedController, ArticleController +from web.controllers import (ArticleController, CategoryController, + FeedController, UserController) def populate_db(): diff --git a/src/tests/libs/article_utils_test.py b/src/tests/libs/article_utils_test.py index 60ce3b8d1..d99505b02 100644 --- a/src/tests/libs/article_utils_test.py +++ b/src/tests/libs/article_utils_test.py @@ -1,7 +1,9 @@ -import unittest import json +import unittest + from mock import patch from requests.exceptions import MissingSchema + from lib.article_utils import construct_article diff --git a/src/tests/libs/feed_utils_test.py b/src/tests/libs/feed_utils_test.py index 5ddc769ab..a06b9ab07 100644 --- a/src/tests/libs/feed_utils_test.py +++ b/src/tests/libs/feed_utils_test.py @@ -1,4 +1,5 @@ import unittest + from lib.feed_utils import construct_feed_from diff --git a/src/tests/views/admin_test.py b/src/tests/views/admin_test.py index 06823cfc4..dc5993b33 100644 --- a/src/tests/views/admin_test.py +++ b/src/tests/views/admin_test.py @@ -1,6 +1,7 @@ +from flask_principal import PermissionDenied + from tests.base import JarrFlaskCommon from web.controllers import UserController -from flask_principal import PermissionDenied class BaseUiTest(JarrFlaskCommon): @@ -79,4 +80,3 @@ def test_toggle_user(self): self.assertEquals(302, resp.status_code) self.assertFalse(self.uctrl.get(id=self.user.id).is_active) self.logout() - diff --git a/src/tests/views/home_test.py b/src/tests/views/home_test.py index e21320cd2..9ae9e1687 100644 --- a/src/tests/views/home_test.py +++ b/src/tests/views/home_test.py @@ -1,7 +1,9 @@ import json + from mock import patch + from tests.base import JarrFlaskCommon -from web.controllers import UserController, FeedController +from web.controllers import FeedController, UserController class BaseUiTest(JarrFlaskCommon): diff --git a/src/tests/views/user_test.py b/src/tests/views/user_test.py index ee44f64ca..3aec045a6 100644 --- a/src/tests/views/user_test.py +++ b/src/tests/views/user_test.py @@ -1,9 +1,10 @@ from io import BytesIO + from mock import patch + from tests.base import JarrFlaskCommon -from web.controllers import (UserController, CategoryController, - FeedController, - ClusterController, ArticleController) +from web.controllers import (ArticleController, CategoryController, + ClusterController, FeedController, UserController) class BaseUiTest(JarrFlaskCommon): diff --git a/src/web/controllers/abstract.py b/src/web/controllers/abstract.py index 7d2bcbdc5..44c04e6cc 100644 --- a/src/web/controllers/abstract.py +++ b/src/web/controllers/abstract.py @@ -1,12 +1,14 @@ import logging -import dateutil.parser -from bootstrap import db -from datetime import datetime from collections import defaultdict +from datetime import datetime + +import dateutil.parser from sqlalchemy import and_, or_ from sqlalchemy.ext.associationproxy import AssociationProxy from werkzeug.exceptions import Forbidden, NotFound +from bootstrap import db + logger = logging.getLogger(__name__) diff --git a/src/web/controllers/article.py b/src/web/controllers/article.py index c22116efe..941732ca2 100644 --- a/src/web/controllers/article.py +++ b/src/web/controllers/article.py @@ -1,14 +1,16 @@ import logging -import sqlalchemy -from sqlalchemy import func from collections import Counter from datetime import datetime, timedelta +import sqlalchemy +from sqlalchemy import func + from bootstrap import db -from .abstract import AbstractController -from web.controllers import CategoryController, FeedController -from web.models import User, Article from lib.article_utils import process_filters +from web.controllers import CategoryController, FeedController +from web.models import Article, User + +from .abstract import AbstractController logger = logging.getLogger(__name__) diff --git a/src/web/controllers/category.py b/src/web/controllers/category.py index 1196e6cba..27cbbe812 100644 --- a/src/web/controllers/category.py +++ b/src/web/controllers/category.py @@ -1,6 +1,7 @@ -from .abstract import AbstractController from web.models import Category +from .abstract import AbstractController + class CategoryController(AbstractController): _db_cls = Category diff --git a/src/web/controllers/cluster.py b/src/web/controllers/cluster.py index 5ad934225..ca00f118a 100644 --- a/src/web/controllers/cluster.py +++ b/src/web/controllers/cluster.py @@ -1,15 +1,17 @@ import logging from datetime import timedelta -from bootstrap import db, SQLITE_ENGINE -from sqlalchemy import func, Integer, and_ -from sqlalchemy.orm import aliased -from sqlalchemy.sql import select, exists +from sqlalchemy import Integer, and_, func from sqlalchemy.dialects.postgres import ARRAY +from sqlalchemy.orm import aliased +from sqlalchemy.sql import exists, select from werkzeug.exceptions import NotFound -from .abstract import AbstractController -from web.models import Cluster, Article + +from bootstrap import SQLITE_ENGINE, db from web.controllers.article import ArticleController +from web.models import Article, Cluster + +from .abstract import AbstractController logger = logging.getLogger(__name__) diff --git a/src/web/controllers/feed.py b/src/web/controllers/feed.py index cc00e510c..65058df7e 100644 --- a/src/web/controllers/feed.py +++ b/src/web/controllers/feed.py @@ -1,12 +1,13 @@ import logging -from sqlalchemy import and_ -from sqlalchemy.sql import select, update, delete from datetime import datetime, timedelta -from bootstrap import db, conf, SQLITE_ENGINE +from sqlalchemy import and_ +from sqlalchemy.sql import delete, select, update + +from bootstrap import SQLITE_ENGINE, conf, db from web.controllers.abstract import AbstractController from web.controllers.icon import IconController -from web.models import User, Feed, Article, Cluster +from web.models import Article, Cluster, Feed, User logger = logging.getLogger(__name__) DEFAULT_LIMIT = 5 diff --git a/src/web/controllers/icon.py b/src/web/controllers/icon.py index 149a69b1b..1c0d47624 100644 --- a/src/web/controllers/icon.py +++ b/src/web/controllers/icon.py @@ -1,7 +1,9 @@ import base64 + from bootstrap import db from lib.utils import jarr_get from web.models import Icon + from .abstract import AbstractController diff --git a/src/web/controllers/user.py b/src/web/controllers/user.py index 8e70e7a46..97c98e482 100644 --- a/src/web/controllers/user.py +++ b/src/web/controllers/user.py @@ -1,8 +1,11 @@ import logging -from werkzeug import generate_password_hash, check_password_hash -from .abstract import AbstractController + +from werkzeug import check_password_hash, generate_password_hash + from web.models import User +from .abstract import AbstractController + logger = logging.getLogger(__name__) diff --git a/src/web/forms.py b/src/web/forms.py index 1071ea07f..cfb2c9b07 100644 --- a/src/web/forms.py +++ b/src/web/forms.py @@ -1,13 +1,15 @@ -from flask_wtf import Form -from flask import url_for, redirect +from flask import redirect, url_for from flask_babel import lazy_gettext -from werkzeug.exceptions import NotFound -from wtforms import TextField, PasswordField, BooleanField, \ - SubmitField, SelectField, validators, HiddenField +from flask_wtf import Form from flask_wtf.html5 import EmailField +from werkzeug.exceptions import NotFound +from wtforms import (BooleanField, HiddenField, PasswordField, SelectField, + SubmitField, TextField, validators) from web import utils from web.controllers import UserController + + # from flask_wtf import RecaptchaField diff --git a/src/web/lib/article_cleaner.py b/src/web/lib/article_cleaner.py index 9e4e9c09d..6cd484fa4 100644 --- a/src/web/lib/article_cleaner.py +++ b/src/web/lib/article_cleaner.py @@ -1,5 +1,7 @@ -from urllib.parse import unquote, urlparse, urlunparse, ParseResult +from urllib.parse import ParseResult, unquote, urlparse, urlunparse + from bs4 import BeautifulSoup + from bootstrap import is_secure_served HTTPS_IFRAME_DOMAINS = ('vimeo.com', 'youtube.com', 'youtu.be') diff --git a/src/web/lib/view_utils.py b/src/web/lib/view_utils.py index 642470fce..0a7d675d2 100644 --- a/src/web/lib/view_utils.py +++ b/src/web/lib/view_utils.py @@ -1,11 +1,13 @@ -import pytz -from functools import wraps from datetime import datetime -from flask import request, Response, make_response, get_flashed_messages -from flask_babel import get_locale +from functools import wraps + +import pytz from babel.dates import format_datetime, format_timedelta -from web.views.common import jsonify +from flask import Response, get_flashed_messages, make_response, request +from flask_babel import get_locale + from lib.utils import to_hash +from web.views.common import jsonify ACCEPTED_LEVELS = {'success', 'info', 'warning', 'error'} diff --git a/src/web/models/article.py b/src/web/models/article.py index 0e28b0068..9a61c4a4d 100644 --- a/src/web/models/article.py +++ b/src/web/models/article.py @@ -1,8 +1,10 @@ from datetime import datetime -from sqlalchemy import (Column, Index, ForeignKey, - Integer, String, Boolean, DateTime) -from sqlalchemy.orm import relationship + +from sqlalchemy import (Boolean, Column, DateTime, ForeignKey, Index, Integer, + String) from sqlalchemy.ext.associationproxy import association_proxy +from sqlalchemy.orm import relationship + from bootstrap import db from web.models.right_mixin import RightMixin diff --git a/src/web/models/category.py b/src/web/models/category.py index c9ab0ade0..4fe6846be 100644 --- a/src/web/models/category.py +++ b/src/web/models/category.py @@ -1,6 +1,6 @@ -from sqlalchemy import (Index, Column, ForeignKey, - Integer, String, Boolean) +from sqlalchemy import Boolean, Column, ForeignKey, Index, Integer, String from sqlalchemy.orm import relationship + from bootstrap import db from web.models.right_mixin import RightMixin diff --git a/src/web/models/cluster.py b/src/web/models/cluster.py index a13c1ed39..8d432f3fb 100644 --- a/src/web/models/cluster.py +++ b/src/web/models/cluster.py @@ -1,7 +1,9 @@ from datetime import datetime -from sqlalchemy import (Column, ForeignKey, Index, - Boolean, String, Integer, DateTime) + +from sqlalchemy import (Boolean, Column, DateTime, ForeignKey, Index, Integer, + String) from sqlalchemy.orm import relationship + from bootstrap import db from web.models.article import Article from web.models.right_mixin import RightMixin diff --git a/src/web/models/feed.py b/src/web/models/feed.py index da47b21a9..cb87d2d88 100644 --- a/src/web/models/feed.py +++ b/src/web/models/feed.py @@ -1,7 +1,9 @@ from datetime import datetime -from sqlalchemy import (Index, Column, ForeignKey, - String, Boolean, Integer, DateTime, PickleType) + +from sqlalchemy import (Boolean, Column, DateTime, ForeignKey, Index, Integer, + PickleType, String) from sqlalchemy.orm import relationship, validates + from bootstrap import db from web.models.right_mixin import RightMixin diff --git a/src/web/models/icon.py b/src/web/models/icon.py index dd257b592..183aefe0e 100644 --- a/src/web/models/icon.py +++ b/src/web/models/icon.py @@ -1,7 +1,8 @@ -from bootstrap import db from sqlalchemy import Column, String from sqlalchemy.orm import relationship +from bootstrap import db + class Icon(db.Model): url = Column(String, primary_key=True) diff --git a/src/web/models/tag.py b/src/web/models/tag.py index a56e00a85..b346581e4 100644 --- a/src/web/models/tag.py +++ b/src/web/models/tag.py @@ -1,5 +1,6 @@ from sqlalchemy import Column, ForeignKey, Integer, String from sqlalchemy.orm import relationship + from bootstrap import db diff --git a/src/web/models/user.py b/src/web/models/user.py index 0d567821c..9bae3e40c 100644 --- a/src/web/models/user.py +++ b/src/web/models/user.py @@ -1,8 +1,9 @@ import re from datetime import datetime -from sqlalchemy import Column, String, Boolean, Integer, DateTime -from sqlalchemy.orm import validates, relationship + from flask_login import UserMixin +from sqlalchemy import Boolean, Column, DateTime, Integer, String +from sqlalchemy.orm import relationship, validates from bootstrap import db from web.models.right_mixin import RightMixin diff --git a/src/web/utils.py b/src/web/utils.py index 968f57fb8..49f0a43b5 100755 --- a/src/web/utils.py +++ b/src/web/utils.py @@ -6,7 +6,7 @@ import logging from collections import Counter -from urllib.parse import urlparse, urljoin +from urllib.parse import urljoin, urlparse import sqlalchemy from flask import request diff --git a/src/web/views/admin.py b/src/web/views/admin.py index baa2b3d4b..c0853219d 100644 --- a/src/web/views/admin.py +++ b/src/web/views/admin.py @@ -1,14 +1,14 @@ import logging -from sqlalchemy import desc from datetime import datetime -from flask import (Blueprint, render_template, redirect, - flash, url_for, request) -from flask_babel import gettext, format_timedelta -from flask_login import login_required, current_user + +from flask import Blueprint, flash, redirect, render_template, request, url_for +from flask_babel import format_timedelta, gettext +from flask_login import current_user, login_required +from sqlalchemy import desc from lib.utils import redirect_url +from web.controllers import ClusterController, FeedController, UserController from web.views.common import admin_permission -from web.controllers import UserController, FeedController, ClusterController logger = logging.getLogger(__name__) admin_bp = Blueprint('admin', __name__, url_prefix='/admin') diff --git a/src/web/views/api/article.py b/src/web/views/api/article.py index 6e895e127..38cfd7143 100644 --- a/src/web/views/api/article.py +++ b/src/web/views/api/article.py @@ -1,13 +1,14 @@ -from bootstrap import conf -import dateutil.parser from datetime import datetime + +import dateutil.parser from flask import current_app from flask_restful import Api -from web.views.common import api_permission +from bootstrap import conf from web.controllers import ArticleController -from web.views.api.common import (PyAggAbstractResource, - PyAggResourceNew, PyAggResourceExisting, PyAggResourceMulti) +from web.views.api.common import (PyAggAbstractResource, PyAggResourceExisting, + PyAggResourceMulti, PyAggResourceNew) +from web.views.common import api_permission class ArticleNewAPI(PyAggResourceNew): diff --git a/src/web/views/api/category.py b/src/web/views/api/category.py index 5d23bbf8d..4e0ab8f90 100644 --- a/src/web/views/api/category.py +++ b/src/web/views/api/category.py @@ -1,11 +1,10 @@ -from bootstrap import conf from flask import current_app from flask_restful import Api +from bootstrap import conf from web.controllers.category import CategoryController -from web.views.api.common import (PyAggResourceNew, - PyAggResourceExisting, - PyAggResourceMulti) +from web.views.api.common import (PyAggResourceExisting, PyAggResourceMulti, + PyAggResourceNew) class CategoryNewAPI(PyAggResourceNew): diff --git a/src/web/views/api/cluster.py b/src/web/views/api/cluster.py index 3af2416e1..d3661d3b2 100644 --- a/src/web/views/api/cluster.py +++ b/src/web/views/api/cluster.py @@ -3,8 +3,8 @@ from bootstrap import conf from web.controllers import ClusterController -from web.views.api.common import ( - PyAggResourceNew, PyAggResourceExisting, PyAggResourceMulti) +from web.views.api.common import (PyAggResourceExisting, PyAggResourceMulti, + PyAggResourceNew) class ClusterNewAPI(PyAggResourceNew): diff --git a/src/web/views/api/common.py b/src/web/views/api/common.py index 4e0cfc8df..5761ebffc 100644 --- a/src/web/views/api/common.py +++ b/src/web/views/api/common.py @@ -20,14 +20,15 @@ """ import logging from functools import wraps -from werkzeug.exceptions import Unauthorized, BadRequest, Forbidden, NotFound + from flask import request -from flask_restful import Resource, reqparse from flask_login import current_user +from flask_restful import Resource, reqparse +from werkzeug.exceptions import BadRequest, Forbidden, NotFound, Unauthorized -from web.views.common import admin_permission, api_permission, \ - login_user_bundle, jsonify from web.controllers import UserController +from web.views.common import (admin_permission, api_permission, jsonify, + login_user_bundle) logger = logging.getLogger(__name__) diff --git a/src/web/views/api/feed.py b/src/web/views/api/feed.py index 254d9401a..2a7564933 100644 --- a/src/web/views/api/feed.py +++ b/src/web/views/api/feed.py @@ -1,14 +1,11 @@ -from bootstrap import conf from flask import current_app from flask_restful import Api +from bootstrap import conf +from web.controllers.feed import DEFAULT_LIMIT, FeedController +from web.views.api.common import (PyAggAbstractResource, PyAggResourceExisting, + PyAggResourceMulti, PyAggResourceNew) from web.views.common import api_permission -from web.controllers.feed import FeedController, DEFAULT_LIMIT - -from web.views.api.common import PyAggAbstractResource, \ - PyAggResourceNew, \ - PyAggResourceExisting, \ - PyAggResourceMulti class FeedNewAPI(PyAggResourceNew): diff --git a/src/web/views/article.py b/src/web/views/article.py index d40e3db46..f48ee97fd 100644 --- a/src/web/views/article.py +++ b/src/web/views/article.py @@ -1,5 +1,5 @@ from flask import Blueprint, render_template -from flask_login import login_required, current_user +from flask_login import current_user, login_required from web.controllers import ArticleController diff --git a/src/web/views/cluster.py b/src/web/views/cluster.py index 80fe264a3..9f140f71f 100644 --- a/src/web/views/cluster.py +++ b/src/web/views/cluster.py @@ -1,5 +1,6 @@ from flask import Blueprint, redirect -from flask_login import login_required, current_user +from flask_login import current_user, login_required + from web.controllers import ClusterController cluster_bp = Blueprint('cluster', __name__, url_prefix='/cluster') diff --git a/src/web/views/common.py b/src/web/views/common.py index d7251579f..2034a62a8 100644 --- a/src/web/views/common.py +++ b/src/web/views/common.py @@ -1,9 +1,11 @@ import json from functools import wraps -from flask import current_app, Response + +from flask import Response, current_app from flask_login import login_user -from flask_principal import (Identity, Permission, RoleNeed, - session_identity_loader, identity_changed) +from flask_principal import (Identity, Permission, RoleNeed, identity_changed, + session_identity_loader) + from lib.utils import default_handler admin_role = RoleNeed('admin') diff --git a/src/web/views/feed.py b/src/web/views/feed.py index b40eb9dcc..f67456953 100644 --- a/src/web/views/feed.py +++ b/src/web/views/feed.py @@ -1,14 +1,13 @@ import logging -from werkzeug.exceptions import BadRequest -from flask import Blueprint, render_template, flash, \ - redirect, request, url_for +from flask import Blueprint, flash, redirect, render_template, request, url_for from flask_babel import gettext -from flask_login import login_required, current_user +from flask_login import current_user, login_required +from werkzeug.exceptions import BadRequest -from web.lib.view_utils import etag_match from lib.feed_utils import construct_feed_from -from web.controllers import FeedController, ClusterController +from web.controllers import ClusterController, FeedController +from web.lib.view_utils import etag_match logger = logging.getLogger(__name__) feeds_bp = Blueprint('feeds', __name__, url_prefix='/feeds') diff --git a/src/web/views/home.py b/src/web/views/home.py index 2f2583386..136ee3e93 100644 --- a/src/web/views/home.py +++ b/src/web/views/home.py @@ -1,23 +1,20 @@ -import pytz import logging from datetime import datetime -from flask import current_app, render_template, request, flash, url_for -from flask_login import login_required, current_user -from flask_babel import get_locale +import pytz from babel.dates import format_datetime, format_timedelta +from flask import current_app, flash, render_template, request, url_for +from flask_babel import get_locale +from flask_login import current_user, login_required from bootstrap import conf +from plugins import readability +from web.controllers import (ArticleController, CategoryController, + ClusterController, FeedController, UserController) from web.lib.article_cleaner import clean_urls -from web.lib.view_utils import etag_match, clusters_to_json, get_notifications +from web.lib.view_utils import clusters_to_json, etag_match, get_notifications from web.views.common import jsonify -from web.controllers import (UserController, CategoryController, - FeedController, ArticleController, - ClusterController) - -from plugins import readability - localize = pytz.utc.localize logger = logging.getLogger(__name__) diff --git a/src/web/views/icon.py b/src/web/views/icon.py index fd41513b6..892c1de46 100644 --- a/src/web/views/icon.py +++ b/src/web/views/icon.py @@ -1,5 +1,7 @@ import base64 + from flask import Blueprint, Response, request + from web.controllers import IconController from web.lib.view_utils import etag_match diff --git a/src/web/views/session_mgmt.py b/src/web/views/session_mgmt.py index fae7a5aac..4ffb432c3 100644 --- a/src/web/views/session_mgmt.py +++ b/src/web/views/session_mgmt.py @@ -1,21 +1,20 @@ import json import logging -from werkzeug.exceptions import NotFound -from flask import (render_template, flash, session, request, - url_for, redirect, current_app) +from flask import (current_app, flash, redirect, render_template, request, + session, url_for) from flask_babel import gettext -from flask_login import LoginManager, logout_user, \ - login_required, current_user -from flask_principal import (Principal, AnonymousIdentity, UserNeed, +from flask_login import LoginManager, current_user, login_required, logout_user +from flask_principal import (AnonymousIdentity, Principal, UserNeed, identity_changed, identity_loaded, session_identity_loader) +from rauth import OAuth1Service, OAuth2Service +from werkzeug.exceptions import NotFound from bootstrap import conf -from web.views.common import admin_role, api_role, login_user_bundle from web.controllers import UserController -from web.forms import SignupForm, SigninForm -from rauth import OAuth1Service, OAuth2Service +from web.forms import SigninForm, SignupForm +from web.views.common import admin_role, api_role, login_user_bundle Principal(current_app) # Create a permission with a single Need, in this case a RoleNeed. diff --git a/src/web/views/user.py b/src/web/views/user.py index dae1222ef..7875c5755 100644 --- a/src/web/views/user.py +++ b/src/web/views/user.py @@ -1,20 +1,19 @@ -import opml import random from datetime import datetime -from werkzeug.exceptions import NotFound, Forbidden -from flask import (Blueprint, render_template, redirect, - flash, url_for, request, make_response) -from flask_principal import Permission, UserNeed + +import opml +from flask import (Blueprint, flash, make_response, redirect, render_template, + request, url_for) from flask_babel import gettext from flask_login import current_user, login_required, logout_user +from flask_principal import Permission, UserNeed +from werkzeug.exceptions import Forbidden, NotFound from bootstrap import conf from lib import emails +from web.controllers import CategoryController, FeedController, UserController +from web.forms import PasswordModForm, ProfileForm, RecoverPasswordForm from web.views.common import admin_permission, login_user_bundle -from web.controllers import (UserController, CategoryController, - FeedController) - -from web.forms import ProfileForm, PasswordModForm, RecoverPasswordForm users_bp = Blueprint('users', __name__, url_prefix='/users') user_bp = Blueprint('user', __name__, url_prefix='/user') diff --git a/src/web/views/views.py b/src/web/views/views.py index cf8787eb1..db50cb7bf 100644 --- a/src/web/views/views.py +++ b/src/web/views/views.py @@ -1,6 +1,7 @@ import logging -from flask import (request, render_template, flash, - url_for, redirect, current_app) + +from flask import (current_app, flash, redirect, render_template, request, + url_for) from flask_babel import gettext from bootstrap import conf From 0f2c3afdd02d225003d1f2643ebae3c0057d0408 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Schmidts?= Date: Sun, 23 Oct 2016 22:43:50 +0200 Subject: [PATCH 033/164] more dead code removal --- src/web/static/js/bootstrap.js | 2317 ---------------------------- src/web/static/js/bootstrap.min.js | 8 +- src/web/static/js/feed.js | 22 - src/web/templates/layout.html | 4 +- 4 files changed, 2 insertions(+), 2349 deletions(-) delete mode 100644 src/web/static/js/bootstrap.js mode change 100644 => 120000 src/web/static/js/bootstrap.min.js delete mode 100644 src/web/static/js/feed.js diff --git a/src/web/static/js/bootstrap.js b/src/web/static/js/bootstrap.js deleted file mode 100644 index 1c88b71e8..000000000 --- a/src/web/static/js/bootstrap.js +++ /dev/null @@ -1,2317 +0,0 @@ -/*! - * Bootstrap v3.3.4 (http://getbootstrap.com) - * Copyright 2011-2015 Twitter, Inc. - * Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE) - */ - -if (typeof jQuery === 'undefined') { - throw new Error('Bootstrap\'s JavaScript requires jQuery') -} - -+function ($) { - 'use strict'; - var version = $.fn.jquery.split(' ')[0].split('.') - if ((version[0] < 2 && version[1] < 9) || (version[0] == 1 && version[1] == 9 && version[2] < 1)) { - throw new Error('Bootstrap\'s JavaScript requires jQuery version 1.9.1 or higher') - } -}(jQuery); - -/* ======================================================================== - * Bootstrap: transition.js v3.3.4 - * http://getbootstrap.com/javascript/#transitions - * ======================================================================== - * Copyright 2011-2015 Twitter, Inc. - * Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE) - * ======================================================================== */ - - -+function ($) { - 'use strict'; - - // CSS TRANSITION SUPPORT (Shoutout: http://www.modernizr.com/) - // ============================================================ - - function transitionEnd() { - var el = document.createElement('bootstrap') - - var transEndEventNames = { - WebkitTransition : 'webkitTransitionEnd', - MozTransition : 'transitionend', - OTransition : 'oTransitionEnd otransitionend', - transition : 'transitionend' - } - - for (var name in transEndEventNames) { - if (el.style[name] !== undefined) { - return { end: transEndEventNames[name] } - } - } - - return false // explicit for ie8 ( ._.) - } - - // http://blog.alexmaccaw.com/css-transitions - $.fn.emulateTransitionEnd = function (duration) { - var called = false - var $el = this - $(this).one('bsTransitionEnd', function () { called = true }) - var callback = function () { if (!called) $($el).trigger($.support.transition.end) } - setTimeout(callback, duration) - return this - } - - $(function () { - $.support.transition = transitionEnd() - - if (!$.support.transition) return - - $.event.special.bsTransitionEnd = { - bindType: $.support.transition.end, - delegateType: $.support.transition.end, - handle: function (e) { - if ($(e.target).is(this)) return e.handleObj.handler.apply(this, arguments) - } - } - }) - -}(jQuery); - -/* ======================================================================== - * Bootstrap: alert.js v3.3.4 - * http://getbootstrap.com/javascript/#alerts - * ======================================================================== - * Copyright 2011-2015 Twitter, Inc. - * Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE) - * ======================================================================== */ - - -+function ($) { - 'use strict'; - - // ALERT CLASS DEFINITION - // ====================== - - var dismiss = '[data-dismiss="alert"]' - var Alert = function (el) { - $(el).on('click', dismiss, this.close) - } - - Alert.VERSION = '3.3.4' - - Alert.TRANSITION_DURATION = 150 - - Alert.prototype.close = function (e) { - var $this = $(this) - var selector = $this.attr('data-target') - - if (!selector) { - selector = $this.attr('href') - selector = selector && selector.replace(/.*(?=#[^\s]*$)/, '') // strip for ie7 - } - - var $parent = $(selector) - - if (e) e.preventDefault() - - if (!$parent.length) { - $parent = $this.closest('.alert') - } - - $parent.trigger(e = $.Event('close.bs.alert')) - - if (e.isDefaultPrevented()) return - - $parent.removeClass('in') - - function removeElement() { - // detach from parent, fire event then clean up data - $parent.detach().trigger('closed.bs.alert').remove() - } - - $.support.transition && $parent.hasClass('fade') ? - $parent - .one('bsTransitionEnd', removeElement) - .emulateTransitionEnd(Alert.TRANSITION_DURATION) : - removeElement() - } - - - // ALERT PLUGIN DEFINITION - // ======================= - - function Plugin(option) { - return this.each(function () { - var $this = $(this) - var data = $this.data('bs.alert') - - if (!data) $this.data('bs.alert', (data = new Alert(this))) - if (typeof option == 'string') data[option].call($this) - }) - } - - var old = $.fn.alert - - $.fn.alert = Plugin - $.fn.alert.Constructor = Alert - - - // ALERT NO CONFLICT - // ================= - - $.fn.alert.noConflict = function () { - $.fn.alert = old - return this - } - - - // ALERT DATA-API - // ============== - - $(document).on('click.bs.alert.data-api', dismiss, Alert.prototype.close) - -}(jQuery); - -/* ======================================================================== - * Bootstrap: button.js v3.3.4 - * http://getbootstrap.com/javascript/#buttons - * ======================================================================== - * Copyright 2011-2015 Twitter, Inc. - * Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE) - * ======================================================================== */ - - -+function ($) { - 'use strict'; - - // BUTTON PUBLIC CLASS DEFINITION - // ============================== - - var Button = function (element, options) { - this.$element = $(element) - this.options = $.extend({}, Button.DEFAULTS, options) - this.isLoading = false - } - - Button.VERSION = '3.3.4' - - Button.DEFAULTS = { - loadingText: 'loading...' - } - - Button.prototype.setState = function (state) { - var d = 'disabled' - var $el = this.$element - var val = $el.is('input') ? 'val' : 'html' - var data = $el.data() - - state = state + 'Text' - - if (data.resetText == null) $el.data('resetText', $el[val]()) - - // push to event loop to allow forms to submit - setTimeout($.proxy(function () { - $el[val](data[state] == null ? this.options[state] : data[state]) - - if (state == 'loadingText') { - this.isLoading = true - $el.addClass(d).attr(d, d) - } else if (this.isLoading) { - this.isLoading = false - $el.removeClass(d).removeAttr(d) - } - }, this), 0) - } - - Button.prototype.toggle = function () { - var changed = true - var $parent = this.$element.closest('[data-toggle="buttons"]') - - if ($parent.length) { - var $input = this.$element.find('input') - if ($input.prop('type') == 'radio') { - if ($input.prop('checked') && this.$element.hasClass('active')) changed = false - else $parent.find('.active').removeClass('active') - } - if (changed) $input.prop('checked', !this.$element.hasClass('active')).trigger('change') - } else { - this.$element.attr('aria-pressed', !this.$element.hasClass('active')) - } - - if (changed) this.$element.toggleClass('active') - } - - - // BUTTON PLUGIN DEFINITION - // ======================== - - function Plugin(option) { - return this.each(function () { - var $this = $(this) - var data = $this.data('bs.button') - var options = typeof option == 'object' && option - - if (!data) $this.data('bs.button', (data = new Button(this, options))) - - if (option == 'toggle') data.toggle() - else if (option) data.setState(option) - }) - } - - var old = $.fn.button - - $.fn.button = Plugin - $.fn.button.Constructor = Button - - - // BUTTON NO CONFLICT - // ================== - - $.fn.button.noConflict = function () { - $.fn.button = old - return this - } - - - // BUTTON DATA-API - // =============== - - $(document) - .on('click.bs.button.data-api', '[data-toggle^="button"]', function (e) { - var $btn = $(e.target) - if (!$btn.hasClass('btn')) $btn = $btn.closest('.btn') - Plugin.call($btn, 'toggle') - e.preventDefault() - }) - .on('focus.bs.button.data-api blur.bs.button.data-api', '[data-toggle^="button"]', function (e) { - $(e.target).closest('.btn').toggleClass('focus', /^focus(in)?$/.test(e.type)) - }) - -}(jQuery); - -/* ======================================================================== - * Bootstrap: carousel.js v3.3.4 - * http://getbootstrap.com/javascript/#carousel - * ======================================================================== - * Copyright 2011-2015 Twitter, Inc. - * Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE) - * ======================================================================== */ - - -+function ($) { - 'use strict'; - - // CAROUSEL CLASS DEFINITION - // ========================= - - var Carousel = function (element, options) { - this.$element = $(element) - this.$indicators = this.$element.find('.carousel-indicators') - this.options = options - this.paused = null - this.sliding = null - this.interval = null - this.$active = null - this.$items = null - - this.options.keyboard && this.$element.on('keydown.bs.carousel', $.proxy(this.keydown, this)) - - this.options.pause == 'hover' && !('ontouchstart' in document.documentElement) && this.$element - .on('mouseenter.bs.carousel', $.proxy(this.pause, this)) - .on('mouseleave.bs.carousel', $.proxy(this.cycle, this)) - } - - Carousel.VERSION = '3.3.4' - - Carousel.TRANSITION_DURATION = 600 - - Carousel.DEFAULTS = { - interval: 5000, - pause: 'hover', - wrap: true, - keyboard: true - } - - Carousel.prototype.keydown = function (e) { - if (/input|textarea/i.test(e.target.tagName)) return - switch (e.which) { - case 37: this.prev(); break - case 39: this.next(); break - default: return - } - - e.preventDefault() - } - - Carousel.prototype.cycle = function (e) { - e || (this.paused = false) - - this.interval && clearInterval(this.interval) - - this.options.interval - && !this.paused - && (this.interval = setInterval($.proxy(this.next, this), this.options.interval)) - - return this - } - - Carousel.prototype.getItemIndex = function (item) { - this.$items = item.parent().children('.item') - return this.$items.index(item || this.$active) - } - - Carousel.prototype.getItemForDirection = function (direction, active) { - var activeIndex = this.getItemIndex(active) - var willWrap = (direction == 'prev' && activeIndex === 0) - || (direction == 'next' && activeIndex == (this.$items.length - 1)) - if (willWrap && !this.options.wrap) return active - var delta = direction == 'prev' ? -1 : 1 - var itemIndex = (activeIndex + delta) % this.$items.length - return this.$items.eq(itemIndex) - } - - Carousel.prototype.to = function (pos) { - var that = this - var activeIndex = this.getItemIndex(this.$active = this.$element.find('.item.active')) - - if (pos > (this.$items.length - 1) || pos < 0) return - - if (this.sliding) return this.$element.one('slid.bs.carousel', function () { that.to(pos) }) // yes, "slid" - if (activeIndex == pos) return this.pause().cycle() - - return this.slide(pos > activeIndex ? 'next' : 'prev', this.$items.eq(pos)) - } - - Carousel.prototype.pause = function (e) { - e || (this.paused = true) - - if (this.$element.find('.next, .prev').length && $.support.transition) { - this.$element.trigger($.support.transition.end) - this.cycle(true) - } - - this.interval = clearInterval(this.interval) - - return this - } - - Carousel.prototype.next = function () { - if (this.sliding) return - return this.slide('next') - } - - Carousel.prototype.prev = function () { - if (this.sliding) return - return this.slide('prev') - } - - Carousel.prototype.slide = function (type, next) { - var $active = this.$element.find('.item.active') - var $next = next || this.getItemForDirection(type, $active) - var isCycling = this.interval - var direction = type == 'next' ? 'left' : 'right' - var that = this - - if ($next.hasClass('active')) return (this.sliding = false) - - var relatedTarget = $next[0] - var slideEvent = $.Event('slide.bs.carousel', { - relatedTarget: relatedTarget, - direction: direction - }) - this.$element.trigger(slideEvent) - if (slideEvent.isDefaultPrevented()) return - - this.sliding = true - - isCycling && this.pause() - - if (this.$indicators.length) { - this.$indicators.find('.active').removeClass('active') - var $nextIndicator = $(this.$indicators.children()[this.getItemIndex($next)]) - $nextIndicator && $nextIndicator.addClass('active') - } - - var slidEvent = $.Event('slid.bs.carousel', { relatedTarget: relatedTarget, direction: direction }) // yes, "slid" - if ($.support.transition && this.$element.hasClass('slide')) { - $next.addClass(type) - $next[0].offsetWidth // force reflow - $active.addClass(direction) - $next.addClass(direction) - $active - .one('bsTransitionEnd', function () { - $next.removeClass([type, direction].join(' ')).addClass('active') - $active.removeClass(['active', direction].join(' ')) - that.sliding = false - setTimeout(function () { - that.$element.trigger(slidEvent) - }, 0) - }) - .emulateTransitionEnd(Carousel.TRANSITION_DURATION) - } else { - $active.removeClass('active') - $next.addClass('active') - this.sliding = false - this.$element.trigger(slidEvent) - } - - isCycling && this.cycle() - - return this - } - - - // CAROUSEL PLUGIN DEFINITION - // ========================== - - function Plugin(option) { - return this.each(function () { - var $this = $(this) - var data = $this.data('bs.carousel') - var options = $.extend({}, Carousel.DEFAULTS, $this.data(), typeof option == 'object' && option) - var action = typeof option == 'string' ? option : options.slide - - if (!data) $this.data('bs.carousel', (data = new Carousel(this, options))) - if (typeof option == 'number') data.to(option) - else if (action) data[action]() - else if (options.interval) data.pause().cycle() - }) - } - - var old = $.fn.carousel - - $.fn.carousel = Plugin - $.fn.carousel.Constructor = Carousel - - - // CAROUSEL NO CONFLICT - // ==================== - - $.fn.carousel.noConflict = function () { - $.fn.carousel = old - return this - } - - - // CAROUSEL DATA-API - // ================= - - var clickHandler = function (e) { - var href - var $this = $(this) - var $target = $($this.attr('data-target') || (href = $this.attr('href')) && href.replace(/.*(?=#[^\s]+$)/, '')) // strip for ie7 - if (!$target.hasClass('carousel')) return - var options = $.extend({}, $target.data(), $this.data()) - var slideIndex = $this.attr('data-slide-to') - if (slideIndex) options.interval = false - - Plugin.call($target, options) - - if (slideIndex) { - $target.data('bs.carousel').to(slideIndex) - } - - e.preventDefault() - } - - $(document) - .on('click.bs.carousel.data-api', '[data-slide]', clickHandler) - .on('click.bs.carousel.data-api', '[data-slide-to]', clickHandler) - - $(window).on('load', function () { - $('[data-ride="carousel"]').each(function () { - var $carousel = $(this) - Plugin.call($carousel, $carousel.data()) - }) - }) - -}(jQuery); - -/* ======================================================================== - * Bootstrap: collapse.js v3.3.4 - * http://getbootstrap.com/javascript/#collapse - * ======================================================================== - * Copyright 2011-2015 Twitter, Inc. - * Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE) - * ======================================================================== */ - - -+function ($) { - 'use strict'; - - // COLLAPSE PUBLIC CLASS DEFINITION - // ================================ - - var Collapse = function (element, options) { - this.$element = $(element) - this.options = $.extend({}, Collapse.DEFAULTS, options) - this.$trigger = $('[data-toggle="collapse"][href="#' + element.id + '"],' + - '[data-toggle="collapse"][data-target="#' + element.id + '"]') - this.transitioning = null - - if (this.options.parent) { - this.$parent = this.getParent() - } else { - this.addAriaAndCollapsedClass(this.$element, this.$trigger) - } - - if (this.options.toggle) this.toggle() - } - - Collapse.VERSION = '3.3.4' - - Collapse.TRANSITION_DURATION = 350 - - Collapse.DEFAULTS = { - toggle: true - } - - Collapse.prototype.dimension = function () { - var hasWidth = this.$element.hasClass('width') - return hasWidth ? 'width' : 'height' - } - - Collapse.prototype.show = function () { - if (this.transitioning || this.$element.hasClass('in')) return - - var activesData - var actives = this.$parent && this.$parent.children('.panel').children('.in, .collapsing') - - if (actives && actives.length) { - activesData = actives.data('bs.collapse') - if (activesData && activesData.transitioning) return - } - - var startEvent = $.Event('show.bs.collapse') - this.$element.trigger(startEvent) - if (startEvent.isDefaultPrevented()) return - - if (actives && actives.length) { - Plugin.call(actives, 'hide') - activesData || actives.data('bs.collapse', null) - } - - var dimension = this.dimension() - - this.$element - .removeClass('collapse') - .addClass('collapsing')[dimension](0) - .attr('aria-expanded', true) - - this.$trigger - .removeClass('collapsed') - .attr('aria-expanded', true) - - this.transitioning = 1 - - var complete = function () { - this.$element - .removeClass('collapsing') - .addClass('collapse in')[dimension]('') - this.transitioning = 0 - this.$element - .trigger('shown.bs.collapse') - } - - if (!$.support.transition) return complete.call(this) - - var scrollSize = $.camelCase(['scroll', dimension].join('-')) - - this.$element - .one('bsTransitionEnd', $.proxy(complete, this)) - .emulateTransitionEnd(Collapse.TRANSITION_DURATION)[dimension](this.$element[0][scrollSize]) - } - - Collapse.prototype.hide = function () { - if (this.transitioning || !this.$element.hasClass('in')) return - - var startEvent = $.Event('hide.bs.collapse') - this.$element.trigger(startEvent) - if (startEvent.isDefaultPrevented()) return - - var dimension = this.dimension() - - this.$element[dimension](this.$element[dimension]())[0].offsetHeight - - this.$element - .addClass('collapsing') - .removeClass('collapse in') - .attr('aria-expanded', false) - - this.$trigger - .addClass('collapsed') - .attr('aria-expanded', false) - - this.transitioning = 1 - - var complete = function () { - this.transitioning = 0 - this.$element - .removeClass('collapsing') - .addClass('collapse') - .trigger('hidden.bs.collapse') - } - - if (!$.support.transition) return complete.call(this) - - this.$element - [dimension](0) - .one('bsTransitionEnd', $.proxy(complete, this)) - .emulateTransitionEnd(Collapse.TRANSITION_DURATION) - } - - Collapse.prototype.toggle = function () { - this[this.$element.hasClass('in') ? 'hide' : 'show']() - } - - Collapse.prototype.getParent = function () { - return $(this.options.parent) - .find('[data-toggle="collapse"][data-parent="' + this.options.parent + '"]') - .each($.proxy(function (i, element) { - var $element = $(element) - this.addAriaAndCollapsedClass(getTargetFromTrigger($element), $element) - }, this)) - .end() - } - - Collapse.prototype.addAriaAndCollapsedClass = function ($element, $trigger) { - var isOpen = $element.hasClass('in') - - $element.attr('aria-expanded', isOpen) - $trigger - .toggleClass('collapsed', !isOpen) - .attr('aria-expanded', isOpen) - } - - function getTargetFromTrigger($trigger) { - var href - var target = $trigger.attr('data-target') - || (href = $trigger.attr('href')) && href.replace(/.*(?=#[^\s]+$)/, '') // strip for ie7 - - return $(target) - } - - - // COLLAPSE PLUGIN DEFINITION - // ========================== - - function Plugin(option) { - return this.each(function () { - var $this = $(this) - var data = $this.data('bs.collapse') - var options = $.extend({}, Collapse.DEFAULTS, $this.data(), typeof option == 'object' && option) - - if (!data && options.toggle && /show|hide/.test(option)) options.toggle = false - if (!data) $this.data('bs.collapse', (data = new Collapse(this, options))) - if (typeof option == 'string') data[option]() - }) - } - - var old = $.fn.collapse - - $.fn.collapse = Plugin - $.fn.collapse.Constructor = Collapse - - - // COLLAPSE NO CONFLICT - // ==================== - - $.fn.collapse.noConflict = function () { - $.fn.collapse = old - return this - } - - - // COLLAPSE DATA-API - // ================= - - $(document).on('click.bs.collapse.data-api', '[data-toggle="collapse"]', function (e) { - var $this = $(this) - - if (!$this.attr('data-target')) e.preventDefault() - - var $target = getTargetFromTrigger($this) - var data = $target.data('bs.collapse') - var option = data ? 'toggle' : $this.data() - - Plugin.call($target, option) - }) - -}(jQuery); - -/* ======================================================================== - * Bootstrap: dropdown.js v3.3.4 - * http://getbootstrap.com/javascript/#dropdowns - * ======================================================================== - * Copyright 2011-2015 Twitter, Inc. - * Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE) - * ======================================================================== */ - - -+function ($) { - 'use strict'; - - // DROPDOWN CLASS DEFINITION - // ========================= - - var backdrop = '.dropdown-backdrop' - var toggle = '[data-toggle="dropdown"]' - var Dropdown = function (element) { - $(element).on('click.bs.dropdown', this.toggle) - } - - Dropdown.VERSION = '3.3.4' - - Dropdown.prototype.toggle = function (e) { - var $this = $(this) - - if ($this.is('.disabled, :disabled')) return - - var $parent = getParent($this) - var isActive = $parent.hasClass('open') - - clearMenus() - - if (!isActive) { - if ('ontouchstart' in document.documentElement && !$parent.closest('.navbar-nav').length) { - // if mobile we use a backdrop because click events don't delegate - $('