diff --git a/lib/linguist/blob_helper.rb b/lib/linguist/blob_helper.rb index 37793a36f8..7fb10c407c 100644 --- a/lib/linguist/blob_helper.rb +++ b/lib/linguist/blob_helper.rb @@ -137,6 +137,11 @@ def binary? elsif encoding.nil? true + # If Charlock returns an ultra-rare encoding which cannot be converted + # to UTF-8. Probably a false positive, and unrenderable otherwise anyway. + elsif ''.respond_to?(:encode!) and not Encoding.name_list.include?(encoding) + true + # If Charlock says its binary else detect_encoding[:type] == :binary @@ -233,6 +238,36 @@ def vendored? name =~ VendoredRegexp ? true : false end + # Internal: Explicitly remove invalid UTF-8 sequences by conversion. + # + # Avoid throwing an error on invalid byte sequences in UTF-8. + # Unfortunately, converting to and from the same encoding is a no-op, + # so if the data is already UTF-8, convert to UTF-16, then back. + # + # Only affects Ruby 1.9+ since 1.8 is charset naive. + # + # Returns the data blob with invalid characters replaced with \uFFFD if needed. + def _safe_data + if viewable? && data + if ''.respond_to?(:encode!) and not encoding.nil? + if encoding == 'UTF-8' + safe_utf16 = Encoding::Converter.new('UTF-8', 'UTF-16BE', + :invalid => :replace, :undefined => :replace) + convert_encoding = 'UTF-16BE' + convert_data = safe_utf16.convert(data) + else + convert_encoding = encoding + convert_data = data + end + safe_utf8 = Encoding::Converter.new(convert_encoding, 'UTF-8', + :invalid => :replace, :undefined => :replace) + safe_utf8.convert(convert_data) + else + data + end + end + end + # Public: Get each line of data # # Requires Blob#data @@ -241,7 +276,7 @@ def vendored? def lines @lines ||= if viewable? && data - data.split(/\r\n|\r|\n/, -1) + _safe_data.split(/\r\n|\r|\n/, -1) else [] end @@ -274,7 +309,7 @@ def sloc # # Return true or false def generated? - @_generated ||= Generated.generated?(name, lambda { data }) + @_generated ||= Generated.generated?(name, lambda { _safe_data }) end # Public: Detects the Language of the blob. diff --git a/lib/linguist/samples.json b/lib/linguist/samples.json index 7935bd56c9..d1b9c75670 100644 --- a/lib/linguist/samples.json +++ b/lib/linguist/samples.json @@ -511,8 +511,8 @@ ".gemrc" ] }, - "tokens_total": 436395, - "languages_total": 507, + "tokens_total": 436487, + "languages_total": 510, "tokens": { "ABAP": { "*/**": 1, @@ -18967,10 +18967,10 @@ }, "JavaScript": { "function": 1210, - "(": 8513, - ")": 8521, + "(": 8518, + ")": 8528, "{": 2736, - ";": 4052, + ";": 4054, "//": 410, "jshint": 1, "_": 9, @@ -18990,9 +18990,9 @@ "constructor": 8, "toggle": 10, "return": 944, - "[": 1459, + "[": 1473, "this.isShown": 3, - "]": 1456, + "]": 1470, "show": 10, "that": 33, "e": 663, @@ -19020,7 +19020,7 @@ "hide": 8, "body": 22, "modal": 4, - "-": 705, + "-": 707, "open": 2, "fade": 4, "hidden": 12, @@ -19067,7 +19067,7 @@ "Animal.prototype.move": 2, "meters": 4, "alert": 11, - "+": 1135, + "+": 1137, "Snake.__super__.constructor.apply": 2, "arguments": 83, "Snake.prototype.move": 2, @@ -19129,7 +19129,7 @@ "info.versionMinor": 2, "parser.incoming.httpVersion": 1, "parser.incoming.url": 1, - "n": 874, + "n": 875, "headers.length": 2, "parser.maxHeaderPairs": 4, "Math.min": 5, @@ -19213,7 +19213,7 @@ "this.socket": 10, "this.connection": 8, "this.httpVersion": 1, - "null": 427, + "null": 429, "this.complete": 2, "this.headers": 2, "this.trailers": 2, @@ -19300,7 +19300,7 @@ "this.connection.writable": 3, "this.output.length": 5, "this._buffer": 2, - "c": 775, + "c": 776, "this.output.shift": 2, "this.outputEncodings.shift": 2, "this.connection.write": 4, @@ -19713,7 +19713,7 @@ ".type": 2, "c.event.handle.apply": 1, "oa": 1, - "r": 261, + "r": 262, "c.data": 12, "a.liveFired": 4, "i.live": 1, @@ -19747,7 +19747,7 @@ "j.handleObj.origHandler.apply": 1, "pa": 1, "b.replace": 3, - "/": 290, + "/": 297, "./g": 2, ".replace": 38, "/g": 37, @@ -19791,7 +19791,7 @@ "T": 4, "Ta": 1, "<[\\w\\W]+>": 4, - "|": 206, + "|": 212, "#": 13, "Ua": 1, ".": 91, @@ -20071,7 +20071,7 @@ "this.queue": 4, "clearQueue": 2, "Aa": 3, - "t": 436, + "t": 437, "ca": 6, "Za": 2, "r/g": 2, @@ -20081,7 +20081,7 @@ "ab": 1, "button": 24, "input": 25, - "/i": 22, + "/i": 23, "bb": 2, "select": 20, "textarea": 8, @@ -22786,7 +22786,7 @@ "u17b5": 1, "u200c": 1, "u200f": 1, - "u2028": 3, + "u2028": 5, "u202f": 1, "u2060": 1, "u206f": 1, @@ -22986,6 +22986,19 @@ "lt": 55, "#x27": 1, "#x2F": 1, + "PR.registerLangHandler": 1, + "PR.createSimpleLexer": 1, + "xa0": 2, + "u2029": 4, + "u201c": 5, + "u201d": 5, + "kwd": 1, + "com": 1, + "lit": 1, + "pln": 1, + "pun": 1, + "u2018": 1, + "u2019": 1, "window.Modernizr": 1, "Modernizr": 12, "enableClasses": 3, @@ -23278,7 +23291,6 @@ "result0.push": 1, "parse_singleLineComment": 2, "parse_multiLineComment": 2, - "u2029": 2, "x0B": 1, "uFEFF": 1, "u1680": 1, @@ -24976,7 +24988,8 @@ "exports.OPERATORS": 1, "exports.is_alphanumeric_char": 1, "exports.set_logger": 1, - "logger": 2 + "logger": 2, + "assertEq": 1 }, "JSON": { "{": 73, @@ -46374,7 +46387,7 @@ "Ioke": 2, "Jade": 3, "Java": 8987, - "JavaScript": 76934, + "JavaScript": 77026, "JSON": 183, "JSON5": 57, "Julia": 247, @@ -46510,7 +46523,7 @@ "Ioke": 1, "Jade": 1, "Java": 6, - "JavaScript": 20, + "JavaScript": 22, "JSON": 4, "JSON5": 2, "Julia": 1, @@ -46558,7 +46571,7 @@ "Processing": 1, "Prolog": 6, "Protocol Buffer": 1, - "Python": 7, + "Python": 8, "R": 2, "Racket": 2, "Ragel in Ruby Host": 3, @@ -46600,5 +46613,5 @@ "Xtend": 2, "YAML": 1 }, - "md5": "7ab5683c610f7e81d6ea5fb470111bbe" + "md5": "12d1d4ad42ef152a4a4a69da5aa9ddd0" } \ No newline at end of file diff --git a/lib/linguist/samples.rb b/lib/linguist/samples.rb index 2bd5212eb2..0c94769cac 100644 --- a/lib/linguist/samples.rb +++ b/lib/linguist/samples.rb @@ -101,7 +101,10 @@ def self.data db['filenames'][language_name].sort! end - data = File.read(sample[:path]) + # Avoid throwing an error on invalid byte sequences. Encoding to and from the same + # charset is a no-op, so read in as UTF-16, then to convert to UTF-8. Not an issue in Ruby 1.8. + data = ''.respond_to?(:encode!) ? File.read(sample[:path]).encode('UTF-16BE', :invalid => :replace, + :undefined => :replace).encode('UTF-8') : File.read(sample[:path]) Classifier.train!(db, language_name, data) end @@ -114,7 +117,8 @@ def self.data # Used to retrieve the interpreter from the shebang line of a file's # data. def self.interpreter_from_shebang(data) - lines = data.lines.to_a + lines = ''.respond_to?(:encode!) ? data.encode('UTF-16BE', :invalid => :replace, + :undefined => :replace).encode('UTF-8').lines.to_a : data.lines.to_a if lines.any? && (match = lines[0].match(/(.+)\n?/)) && (bang = match[0]) =~ /^#!/ bang.sub!(/^#! /, '#!') diff --git a/lib/linguist/tokenizer.rb b/lib/linguist/tokenizer.rb index 4b2ea60741..f4f5686b9b 100644 --- a/lib/linguist/tokenizer.rb +++ b/lib/linguist/tokenizer.rb @@ -55,7 +55,8 @@ def self.tokenize(data) # # Returns Array of token Strings. def extract_tokens(data) - s = StringScanner.new(data) + s = ''.respond_to?(:encode!) ? StringScanner.new(data.encode('UTF-16BE', + :invalid => :replace,:undefined => :replace).encode('UTF-8')) : StringScanner.new(data) tokens = [] until s.eos? diff --git a/samples/JavaScript/lang-vb.js b/samples/JavaScript/lang-vb.js new file mode 100644 index 0000000000..07506b03cd --- /dev/null +++ b/samples/JavaScript/lang-vb.js @@ -0,0 +1,2 @@ +PR.registerLangHandler(PR.createSimpleLexer([["pln",/^[\t\n\r \xa0\u2028\u2029]+/,null,"\t\n\r \xa0

"],["str",/^(?:["\u201c\u201d](?:[^"\u201c\u201d]|["\u201c\u201d]{2})(?:["\u201c\u201d]c|$)|["\u201c\u201d](?:[^"\u201c\u201d]|["\u201c\u201d]{2})*(?:["\u201c\u201d]|$))/i,null,'"“”'],["com",/^['\u2018\u2019].*/,null,"'‘’"]],[["kwd",/^(?:addhandler|addressof|alias|and|andalso|ansi|as|assembly|auto|boolean|byref|byte|byval|call|case|catch|cbool|cbyte|cchar|cdate|cdbl|cdec|char|cint|class|clng|cobj|const|cshort|csng|cstr|ctype|date|decimal|declare|default|delegate|dim|directcast|do|double|each|else|elseif|end|endif|enum|erase|error|event|exit|finally|for|friend|function|get|gettype|gosub|goto|handles|if|implements|imports|in|inherits|integer|interface|is|let|lib|like|long|loop|me|mod|module|mustinherit|mustoverride|mybase|myclass|namespace|new|next|not|notinheritable|notoverridable|object|on|option|optional|or|orelse|overloads|overridable|overrides|paramarray|preserve|private|property|protected|public|raiseevent|readonly|redim|removehandler|resume|return|select|set|shadows|shared|short|single|static|step|stop|string|structure|sub|synclock|then|throw|to|try|typeof|unicode|until|variant|wend|when|while|with|withevents|writeonly|xor|endif|gosub|let|variant|wend)\b/i, +null],["com",/^rem.*/i],["lit",/^(?:true\b|false\b|nothing\b|\d+(?:e[+-]?\d+[dfr]?|[dfilrs])?|(?:&h[\da-f]+|&o[0-7]+)[ils]?|\d*\.\d+(?:e[+-]?\d+)?[dfr]?|#\s+(?:\d+[/-]\d+[/-]\d+(?:\s+\d+:\d+(?::\d+)?(\s*(?:am|pm))?)?|\d+:\d+(?::\d+)?(\s*(?:am|pm))?)\s+#)/i],["pln",/^(?:(?:[a-z]|_\w)\w*|\[(?:[a-z]|_\w)\w*])/i],["pun",/^[^\w\t\n\r "'[\]\xa0\u2018\u2019\u201c\u201d\u2028\u2029]+/],["pun",/^(?:\[|])/]]),["vb","vbs"]); diff --git a/samples/JavaScript/xor-sanity.js b/samples/JavaScript/xor-sanity.js new file mode 100644 index 0000000000..4f41e0fcec --- /dev/null +++ b/samples/JavaScript/xor-sanity.js @@ -0,0 +1 @@ +assertEq(-2^31, -31); diff --git a/samples/Python/shtest-encoding.py b/samples/Python/shtest-encoding.py new file mode 100644 index 0000000000..dfc987f6df --- /dev/null +++ b/samples/Python/shtest-encoding.py @@ -0,0 +1,3 @@ +# RUN: true + +# Here is a string that cannot be decoded in line mode: . diff --git a/samples/Text/btParallelConstraintSolver.h b/samples/Text/btParallelConstraintSolver.h new file mode 100644 index 0000000000..7c0268e7f8 --- /dev/null +++ b/samples/Text/btParallelConstraintSolver.h @@ -0,0 +1,285 @@ +/* + Copyright (C) 2010 Sony Computer Entertainment Inc. + All rights reserved. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +*/ + +#ifndef __BT_PARALLEL_CONSTRAINT_SOLVER_H +#define __BT_PARALLEL_CONSTRAINT_SOLVER_H + +#include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h" + + + + +#include "LinearMath/btScalar.h" +#include "PlatformDefinitions.h" + + +#define PFX_MAX_SOLVER_PHASES 64 +#define PFX_MAX_SOLVER_BATCHES 16 +#define PFX_MAX_SOLVER_PAIRS 128 +#define PFX_MIN_SOLVER_PAIRS 16 + +#ifdef __CELLOS_LV2__ +ATTRIBUTE_ALIGNED128(struct) PfxParallelBatch { +#else +ATTRIBUTE_ALIGNED16(struct) PfxParallelBatch { +#endif + uint16_t pairIndices[PFX_MAX_SOLVER_PAIRS]; +}; + +#ifdef __CELLOS_LV2__ +ATTRIBUTE_ALIGNED128(struct) PfxParallelGroup { +#else +ATTRIBUTE_ALIGNED16(struct) PfxParallelGroup { +#endif + uint16_t numPhases; + uint16_t numBatches[PFX_MAX_SOLVER_PHASES]; + uint16_t numPairs[PFX_MAX_SOLVER_PHASES*PFX_MAX_SOLVER_BATCHES]; +}; + + + +ATTRIBUTE_ALIGNED16(struct) PfxSortData16 { + union { + uint8_t i8data[16]; + uint16_t i16data[8]; + uint32_t i32data[4]; +#ifdef __SPU__ + vec_uint4 vdata; +#endif + }; + +#ifdef __SPU__ + void set8(int elem,uint8_t data) {vdata=(vec_uint4)spu_insert(data,(vec_uchar16)vdata,elem);} + void set16(int elem,uint16_t data) {vdata=(vec_uint4)spu_insert(data,(vec_ushort8)vdata,elem);} + void set32(int elem,uint32_t data) {vdata=(vec_uint4)spu_insert(data,(vec_uint4)vdata,elem);} + uint8_t get8(int elem) const {return spu_extract((vec_uchar16)vdata,elem);} + uint16_t get16(int elem) const {return spu_extract((vec_ushort8)vdata,elem);} + uint32_t get32(int elem) const {return spu_extract((vec_uint4)vdata,elem);} +#else + void set8(int elem,uint8_t data) {i8data[elem] = data;} + void set16(int elem,uint16_t data) {i16data[elem] = data;} + void set32(int elem,uint32_t data) {i32data[elem] = data;} + uint8_t get8(int elem) const {return i8data[elem];} + uint16_t get16(int elem) const {return i16data[elem];} + uint32_t get32(int elem) const {return i32data[elem];} +#endif +}; + +typedef PfxSortData16 PfxConstraintPair; + + +//J PfxBroadphasePairƋ + +SIMD_FORCE_INLINE void pfxSetConstraintId(PfxConstraintPair &pair,uint32_t i) {pair.set32(2,i);} +SIMD_FORCE_INLINE void pfxSetNumConstraints(PfxConstraintPair &pair,uint8_t n) {pair.set8(7,n);} + +SIMD_FORCE_INLINE uint32_t pfxGetConstraintId1(const PfxConstraintPair &pair) {return pair.get32(2);} +SIMD_FORCE_INLINE uint8_t pfxGetNumConstraints(const PfxConstraintPair &pair) {return pair.get8(7);} + +typedef PfxSortData16 PfxBroadphasePair; + +SIMD_FORCE_INLINE void pfxSetRigidBodyIdA(PfxBroadphasePair &pair,uint16_t i) {pair.set16(0,i);} +SIMD_FORCE_INLINE void pfxSetRigidBodyIdB(PfxBroadphasePair &pair,uint16_t i) {pair.set16(1,i);} +SIMD_FORCE_INLINE void pfxSetMotionMaskA(PfxBroadphasePair &pair,uint8_t i) {pair.set8(4,i);} +SIMD_FORCE_INLINE void pfxSetMotionMaskB(PfxBroadphasePair &pair,uint8_t i) {pair.set8(5,i);} +SIMD_FORCE_INLINE void pfxSetBroadphaseFlag(PfxBroadphasePair &pair,uint8_t f) {pair.set8(6,(pair.get8(6)&0xf0)|(f&0x0f));} +SIMD_FORCE_INLINE void pfxSetActive(PfxBroadphasePair &pair,bool b) {pair.set8(6,(pair.get8(6)&0x0f)|((b?1:0)<<4));} +SIMD_FORCE_INLINE void pfxSetContactId(PfxBroadphasePair &pair,uint32_t i) {pair.set32(2,i);} + +SIMD_FORCE_INLINE uint16_t pfxGetRigidBodyIdA(const PfxBroadphasePair &pair) {return pair.get16(0);} +SIMD_FORCE_INLINE uint16_t pfxGetRigidBodyIdB(const PfxBroadphasePair &pair) {return pair.get16(1);} +SIMD_FORCE_INLINE uint8_t pfxGetMotionMaskA(const PfxBroadphasePair &pair) {return pair.get8(4);} +SIMD_FORCE_INLINE uint8_t pfxGetMotionMaskB(const PfxBroadphasePair &pair) {return pair.get8(5);} +SIMD_FORCE_INLINE uint8_t pfxGetBroadphaseFlag(const PfxBroadphasePair &pair) {return pair.get8(6)&0x0f;} +SIMD_FORCE_INLINE bool pfxGetActive(const PfxBroadphasePair &pair) {return (pair.get8(6)>>4)!=0;} +SIMD_FORCE_INLINE uint32_t pfxGetContactId1(const PfxBroadphasePair &pair) {return pair.get32(2);} + + + +#if defined(__PPU__) || defined (__SPU__) +ATTRIBUTE_ALIGNED128(struct) PfxSolverBody { +#else +ATTRIBUTE_ALIGNED16(struct) PfxSolverBody { +#endif + vmVector3 mDeltaLinearVelocity; + vmVector3 mDeltaAngularVelocity; + vmMatrix3 mInertiaInv; + vmQuat mOrientation; + float mMassInv; + float friction; + float restitution; + float unused; + float unused2; + float unused3; + float unused4; + float unused5; +}; + + +#ifdef __PPU__ +#include "SpuDispatch/BulletPE2ConstraintSolverSpursSupport.h" +#endif + +static SIMD_FORCE_INLINE vmVector3 btReadVector3(const double* p) +{ + float tmp[3] = {float(p[0]),float(p[1]),float(p[2])}; + vmVector3 v; + loadXYZ(v, tmp); + return v; +} + +static SIMD_FORCE_INLINE vmQuat btReadQuat(const double* p) +{ + float tmp[4] = {float(p[0]),float(p[1]),float(p[2]),float(p[4])}; + vmQuat vq; + loadXYZW(vq, tmp); + return vq; +} + +static SIMD_FORCE_INLINE void btStoreVector3(const vmVector3 &src, double* p) +{ + float tmp[3]; + vmVector3 v = src; + storeXYZ(v, tmp); + p[0] = tmp[0]; + p[1] = tmp[1]; + p[2] = tmp[2]; +} + + +static SIMD_FORCE_INLINE vmVector3 btReadVector3(const float* p) +{ + vmVector3 v; + loadXYZ(v, p); + return v; +} + +static SIMD_FORCE_INLINE vmQuat btReadQuat(const float* p) +{ + vmQuat vq; + loadXYZW(vq, p); + return vq; +} + +static SIMD_FORCE_INLINE void btStoreVector3(const vmVector3 &src, float* p) +{ + vmVector3 v = src; + storeXYZ(v, p); +} + + + + +class btPersistentManifold; + +enum { + PFX_CONSTRAINT_SOLVER_CMD_SETUP_SOLVER_BODIES, + PFX_CONSTRAINT_SOLVER_CMD_SETUP_CONTACT_CONSTRAINTS, + PFX_CONSTRAINT_SOLVER_CMD_SETUP_JOINT_CONSTRAINTS, + PFX_CONSTRAINT_SOLVER_CMD_SOLVE_CONSTRAINTS, + PFX_CONSTRAINT_SOLVER_CMD_POST_SOLVER +}; + + +struct PfxSetupContactConstraintsIO { + PfxConstraintPair *offsetContactPairs; + uint32_t numContactPairs1; + btPersistentManifold* offsetContactManifolds; + class TrbState *offsetRigStates; + struct PfxSolverBody *offsetSolverBodies; + uint32_t numRigidBodies; + float separateBias; + float timeStep; + class btCriticalSection* criticalSection; +}; + + + +struct PfxSolveConstraintsIO { + PfxParallelGroup *contactParallelGroup; + PfxParallelBatch *contactParallelBatches; + PfxConstraintPair *contactPairs; + uint32_t numContactPairs; + btPersistentManifold *offsetContactManifolds; + PfxParallelGroup *jointParallelGroup; + PfxParallelBatch *jointParallelBatches; + PfxConstraintPair *jointPairs; + uint32_t numJointPairs; + struct btSolverConstraint* offsetSolverConstraints; + TrbState *offsetRigStates1; + PfxSolverBody *offsetSolverBodies; + uint32_t numRigidBodies; + uint32_t iteration; + + uint32_t taskId; + + class btBarrier* barrier; + +}; + +struct PfxPostSolverIO { + TrbState *states; + PfxSolverBody *solverBodies; + uint32_t numRigidBodies; +}; + +ATTRIBUTE_ALIGNED16(struct) btConstraintSolverIO { + uint8_t cmd; + union { + PfxSetupContactConstraintsIO setupContactConstraints; + PfxSolveConstraintsIO solveConstraints; + PfxPostSolverIO postSolver; + }; + + //SPU only + uint32_t barrierAddr2; + uint32_t criticalsectionAddr2; + uint32_t maxTasks1; +}; + + + + +void SolverThreadFunc(void* userPtr,void* lsMemory); +void* SolverlsMemoryFunc(); +///The btParallelConstraintSolver performs computations on constraint rows in parallel +///Using the cross-platform threading it supports Windows, Linux, Mac OSX and PlayStation 3 Cell SPUs +class btParallelConstraintSolver : public btSequentialImpulseConstraintSolver +{ + +protected: + struct btParallelSolverMemoryCache* m_memoryCache; + + class btThreadSupportInterface* m_solverThreadSupport; + + struct btConstraintSolverIO* m_solverIO; + class btBarrier* m_barrier; + class btCriticalSection* m_criticalSection; + + +public: + + btParallelConstraintSolver(class btThreadSupportInterface* solverThreadSupport); + + virtual ~btParallelConstraintSolver(); + + virtual btScalar solveGroup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifold,int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& info, btIDebugDraw* debugDrawer, btStackAlloc* stackAlloc,btDispatcher* dispatcher); + +}; + + + +#endif //__BT_PARALLEL_CONSTRAINT_SOLVER_H \ No newline at end of file