Skip to content

Commit

Permalink
Remove "type" checking when parsing <script> tags.
Browse files Browse the repository at this point in the history
  • Loading branch information
Chun-Hung Hsiao committed Feb 14, 2015
1 parent 9bceefc commit dfacde7
Showing 1 changed file with 17 additions and 19 deletions.
36 changes: 17 additions & 19 deletions scripts/extract_js.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import os
import redis
from slimit.parser import Parser as JSParser
import sqlite3
import sys
from time import gmtime
from time import strftime
Expand Down Expand Up @@ -36,24 +35,23 @@ def handle_starttag(self, tag, attrs):
if tag == 'script':
self.pos = self.getpos()
attrs = dict(attrs)
if attrs.get('type', None) == 'text/javascript':
src = attrs.get('src', '').strip()
if src:
url = urljoin(self.url, src)
path = self._getpath(url)
if self._addpath(path):
try:
urlres = urlopen(url)
if urlres.getcode() != 200:
raise IOError
script = urlres.read().strip()
if script.startswith('<!--') and script.endswith('-->'):
script = script[4:-3].strip()
if script:
date = strftime('%a, %d %b %Y %H:%M:%S GMT', gmtime())
self._addscript(path, date, url, script)
except IOError:
print >> sys.stderr, 'Cannot retrieve ' + url
src = attrs.get('src', '').strip()
if src:
url = urljoin(self.url, src)
path = self._getpath(url)
if self._addpath(path):
try:
urlres = urlopen(url)
if urlres.getcode() != 200:
raise IOError
script = urlres.read().strip()
if script.startswith('<!--') and script.endswith('-->'):
script = script[4:-3].strip()
if script:
date = strftime('%a, %d %b %Y %H:%M:%S GMT', gmtime())
self._addscript(path, date, url, script)
except IOError:
print >> sys.stderr, 'Cannot retrieve ' + url

def handle_data(self, data):
if self.pos:
Expand Down

0 comments on commit dfacde7

Please sign in to comment.