fterh · iedr · Oct 31, 2019 · Jan 25, 2020 · Jan 25, 2020
diff --git a/README.md b/README.md
@@ -13,6 +13,7 @@ too long.
 * channelnewsasia.com (CNAlifestyle)
 * mothership.sg  
 * ricemedia.co
+* scmp.com
 * straitstimes.com
 * tnp.sg
 * todayonline.com

diff --git a/config.py b/config.py
@@ -13,7 +13,7 @@
 }
 
 BOT = {
-    "VERSION": "1.0.2",
+    "VERSION": "1.0.3",
     "REPO_LINK": "https://github.com/fterh/sneakpeek",
     "CONTRIBUTE_LINK": "https://github.com/fterh/sneakpeek"
 }

diff --git a/handlers/scmp_handler/__init__.py b/handlers/scmp_handler/__init__.py
@@ -0,0 +1,63 @@
+"""Handler for South China Morning Post."""
+
+import requests
+from bs4 import BeautifulSoup
+from comment import Comment
+from itertools import chain
+from handlers.abstract_base_handler import AbstractBaseHandler, HandlerError
+
+def get_text_from_children(children_element):
+    child_body = ""
+
+    for child in children_element:
+        if child['type'] == "text":
+            child_body += child['data']
+        elif child['type'] == "a" or child['type'] == "em":
+            child_body += get_text_from_children(child['children'])
+
+    return child_body
+
+class ScmpHandler(AbstractBaseHandler):
+    """Handler for Scmp."""
+
+    @classmethod
+    def handle(cls, url):
+        html = requests.get(url).text
+        soup = BeautifulSoup(html, "html.parser")
+
+        article_script = [script.text for script in soup.find_all("script") if script is not None and "window.__APOLLO_STATE__" in script.text.strip()]
+        json_article = json.loads(article_script[0][ article_script[0].find("=")+1: ])
+        json_content = json_article['contentService']
+
+        content_key = [k for k in json_content.keys() if k.startswith("$ROOT_QUERY.content")][0]
+        title = json_content[content_key]['headline']
+
+        subheadlines_element = json_content[content_key]['subHeadline']['json'][0]['children']
+
+        subheadlines = []
+        for elem in subheadlines_element:
+            if elem['type'] == "text":
+                subheadlines.append(elem['data'])
+            else:
+                for elem_child in elem['children']:
+                    if elem_child['type'] == "text":
+                        subheadlines.append(elem_child['data'])
+
+        subheadlines_md = "\n\n".join(["* " + subheadline.strip() for subheadline in subheadlines if subheadline.strip() != ""])
+
+        body_key = [k for k in json_content[content_key].keys() if k.startswith("body")][0]
+
+        body_list = []
+        for j in json_content[content_key][body_key]['json']:
+            if "children" not in j.keys():
+                continue
+
+            if j['type'] != "p":
+                continue
+
+            body_list.append(get_text_from_children(j['children']))
+
+        body_list = [b.strip() for b in body_list if b.strip() != ""]
+        body = subheadlines_md + "\n\n" + "\n\n".join(body_list)
+
+        return Comment(title, body.strip())