fossology · Kaushl2208 · Oct 6, 2021 · Apr 28, 2021 · Apr 28, 2021 · Apr 28, 2021
diff --git a/nirjas/__init__.py b/nirjas/__init__.py
@@ -1,6 +1,32 @@
+#!/usr/bin/env python3
+
+'''
+nirjas module which can be imported by other tools
+
+SPDX-License-Identifier: LGPL-2.1
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+'''
+
 from nirjas.main import *
 
 def extract(file):
+    '''
+    Extract the information from the given file.
+    :param file: File or directory to get information from
+    :type file: string
+    :return: Returns comments and other meta information about the given file.
+    '''
     return file_runner(file)
 
-__all__ = ['file_runner','extract', 'langIdentifier']
+__all__ = ['file_runner', 'extract', 'LanguageMapper']
diff --git a/nirjas/binder.py b/nirjas/binder.py
@@ -1,7 +1,8 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 '''
-Copyright (C) 2020  Ayush Bhardwaj (classicayush@gmail.com), Kaushlendra Pratap (kaushlendrapratap.9837@gmail.com)
+Copyright (C) 2020  Ayush Bhardwaj (classicayush@gmail.com),
+Kaushlendra Pratap (kaushlendrapratap.9837@gmail.com)
 
 SPDX-License-Identifier: LGPL-2.1
 
@@ -25,7 +26,7 @@
 from operator import itemgetter
 
 
-def readSingleLine(file, regex, sign):
+def readSingleLine(file, regex):
     '''
     Read file line by line and match the given regex to get comment.
     Return comments, lines read, blank lines, and lines with comments.
@@ -60,7 +61,7 @@ def contSingleLines(data):
     for i in data[0]:
         lines.append(i[0])
 
-    for a, b in groupby(enumerate(lines), lambda x: x[0] - x[1]):
+    for _, b in groupby(enumerate(lines), lambda x: x[0] - x[1]):
         temp = list(map(itemgetter(1), b))
         content = ""
 
@@ -69,7 +70,9 @@ def contSingleLines(data):
             end_line.append(temp[-1])
             for i in temp:
                 comment = [x[1] for x in data[0] if x[0] == i]
-                [data[0].remove(x) for x in data[0] if x[0] == i]
+                for index, x in enumerate(data[0]):
+                    if x[0] == i:
+                        del data[0][index]
                 content = content + ' ' + comment[0]
             output.append(content)
     return data, start_line, end_line, output
@@ -79,29 +82,38 @@ def readMultiLineSame(file, syntax: str):
     '''
     Read multiline comments where starting and ending symbols are same.
     '''
-    lines, output, start_line, end_line = [], [], [], []
+    start_line, end_line, output = [], [], []
     content = ""
+    if '"' in syntax:
+        syntax_in_string = "'" + syntax
+    elif "'" in syntax:
+        syntax_in_string = '"' + syntax
     closingCount, lines_of_comment = 0, 0
     copy = False
     with open(file) as f:
         for line_number, line in enumerate(f, start=1):
-            if syntax in line:
+            if syntax in line and \
+                    syntax_in_string not in line:
                 closingCount += 1
                 copy = True
+                if line.count(syntax) == 2:
+                    # Start and end on same line
+                    closingCount = 2
+                    content = line.replace('\n', ' ')
+                    start_line.append(line_number)
                 if closingCount % 2 == 0 and closingCount != 0:
                     copy = False
                     output.append(content.strip())
                     content = ""
                     end_line.append(line_number)
-                lines.append(line_number)
+                else:
+                    start_line.append(line_number)
 
             if copy:
                 lines_of_comment += 1
                 content = content + line.replace('\n', ' ')
 
-            output = [s.strip(syntax).strip() for s in output]
-
-        start_line = list(filter(lambda x: x not in end_line, lines))
+    output = [s.strip(syntax).strip() for s in output]
     return start_line, end_line, output, lines_of_comment
 
 
@@ -133,71 +145,85 @@ def readMultiLineDiff(file, startSyntax: str, endSyntax: str):
                 content = content + (line.replace('\n', ' ')).strip()
             if line.strip() == '':
                 blank_lines += 1
-        for idx, i in enumerate(endLine):
+        for idx, _ in enumerate(endLine):
             line_of_comments = line_of_comments + (endLine[idx] - startLine[idx]) + 1
         line_of_comments += len(output)
         output = [s.strip(startSyntax).strip(endSyntax).strip() for s in output]
     return startLine, endLine, output, line_of_comments, total_lines, blank_lines
 
 
 class CommentSyntax:
-
+    '''
+    Class to hold various regex and helper functions based on comment format
+    used by a language.
+    '''
     def __init__(self):
-        pass
+        self.sign = None
+        self.pattern = None
+        self.start = None
+        self.end = None
 
     def hash(self, file):
         '''
         sign: #
         '''
         self.sign = '#'
-        self.pattern_hash = r'''#+\s*(.*)'''
-        return readSingleLine(file, self.pattern_hash, self.sign)
+        self.pattern = r'''(?<!["'`])#+\s*(.*)'''
+        return readSingleLine(file, self.pattern)
+
+    def hashNoCurl(self, file):
+        '''
+        sign: #
+        '''
+        self.sign = '#'
+        self.pattern = r'''(?<!["'`])#+(?!\{)\s*(.*)'''
+        return readSingleLine(file, self.pattern)
 
     def percentage(self, file):
         '''
         sign: %
         '''
         self.sign = '%'
-        self.pattern_percentage = r'''\%\s*(.*)'''
-        return readSingleLine(file, self.pattern_percentage, self.sign)
+        self.pattern = r'''(?<!["'`])\%\s*(.*)'''
+        return readSingleLine(file, self.pattern)
 
     def doubleSlash(self, file):
         '''
         sign: //
         '''
         self.sign = '//'
-        self.pattern_doubleSlash = r'''(?<![pst]:)\/\/\s*(.*)'''
-        return readSingleLine(file, self.pattern_doubleSlash, self.sign)
+        self.pattern = r'''(?<![pst'"`]:)\/\/\s*(.*)'''
+        return readSingleLine(file, self.pattern)
 
     def doubleNotTripleSlash(self, file):
         '''
         sign: //
         '''
         self.sign = '//'
-        self.pattern_doubleNotTripleSlash = r'''(?<!\/)\/\/(?!\/)\s*(.*)'''
-        return readSingleLine(file, self.pattern_doubleNotTripleSlash, self.sign)
+        self.pattern = r'''(?<!\/)\/\/(?!\/)\s*(.*)'''
+        return readSingleLine(file, self.pattern)
 
     def singleQuotes(self, file):
         """
         sign: '''  '''
         """
-        self.syntax = "'''"
-        return readMultiLineSame(file, self.syntax)
+        self.sign = "'''"
+        return readMultiLineSame(file, self.sign)
 
     def doubleQuotes(self, file):
         '''
         sign: """ """
         '''
-        self.syntax = '"""'
-        return readMultiLineSame(file, self.syntax)
+        self.sign = '"""'
+        return readMultiLineSame(file, self.sign)
 
     def doubleDash(self, file):
         '''
         sign: --
         '''
         self.sign = '--'
-        self.pattern_doubleDash = r'''\-\-\s*(.*)'''
-        return readSingleLine(file, self.pattern_doubleDash, self.sign)
+        self.pattern = r'''(?<!["'`])\-\-\s*(.*)'''
+        return readSingleLine(file, self.pattern)
 
     def slashStar(self, file):
         '''
@@ -252,8 +278,8 @@ def tripleSlash(self, file):
         sign: ///
         '''
         self.sign = '///'
-        self.pattern_tripleSlash = r'''\/\/\/\s*(.*)'''
-        return readSingleLine(file, self.pattern_tripleSlash, self.sign)
+        self.pattern = r'''(?<!["'`])\/\/\/\s*(.*)'''
+        return readSingleLine(file, self.pattern)
 
     def slashDoubleStar(self, file):
         '''

diff --git a/nirjas/languages/__init__.py b/nirjas/languages/__init__.py
@@ -1 +1,26 @@
-__all__ = ["c", "c_sharp", "cpp", "css", "dart", "go", "haskell", "html", "java", "javascript", "kotlin", "matlab", "perl", "php", "python", "r", "ruby", "rust", "scala", "scss", "shell", "swift", "text", "typescript"]
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+'''
+SPDX-License-Identifier: LGPL-2.1
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+Module holding different language syntax
+'''
+
+__all__ = ["c", "c_sharp", "cpp", "css", "dart", "go", "haskell", "html",
+           "java", "javascript", "kotlin", "matlab", "perl", "php", "python",
+           "r", "ruby", "rust", "scala", "scss", "shell", "swift", "text",
+           "typescript"]
diff --git a/nirjas/languages/c.py b/nirjas/languages/c.py
@@ -1,7 +1,8 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 '''
-Copyright (C) 2020  Ayush Bhardwaj (classicayush@gmail.com), Kaushlendra Pratap (kaushlendrapratap.9837@gmail.com)
+Copyright (C) 2020  Ayush Bhardwaj (classicayush@gmail.com),
+Kaushlendra Pratap (kaushlendrapratap.9837@gmail.com)
 
 SPDX-License-Identifier: LGPL-2.1
 
@@ -20,11 +21,18 @@
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 '''
 
-from nirjas.binder import *
+from nirjas.binder import CommentSyntax, contSingleLines
 from nirjas.output import ScanOutput, SingleLine, MultiLine
 
 
 def cExtractor(file):
+    '''
+    Extract comments from C file.
+    :param file: File to scan
+    :type file: string
+    :return: Scan output
+    :rtype: ScanOutput
+    '''
     result = CommentSyntax()
     single_line_comment = result.doubleSlash(file)
     multiline_comment = result.slashStar(file)
@@ -43,13 +51,13 @@ def cExtractor(file):
     for i in single_line_comment[0]:
         output.single_line_comment.append(SingleLine(i[0], i[1]))
 
-    for idx, i in enumerate(cont_single_line_comment[1]):
+    for idx, _ in enumerate(cont_single_line_comment[1]):
         output.cont_single_line_comment.append(MultiLine(
             cont_single_line_comment[1][idx], cont_single_line_comment[2][idx],
             cont_single_line_comment[3][idx]))
 
     try:
-        for idx, i in enumerate(multiline_comment[0]):
+        for idx, _ in enumerate(multiline_comment[0]):
             output.multi_line_comment.append(MultiLine(multiline_comment[0][idx],
                                                        multiline_comment[1][idx],
                                                        multiline_comment[2][idx]))
@@ -60,6 +68,15 @@ def cExtractor(file):
 
 
 def cSource(file, new_file: str):
+    '''
+    Extract source from C file and put at new_file.
+    :param file: File to process
+    :type file: string
+    :param new_file: File to put source at
+    :type new_file: string
+    :return: Path to new file
+    :rtype: string
+    '''
     copy = True
     with open(new_file, 'w+') as f1:
         with open(file) as f:

diff --git a/nirjas/languages/c_sharp.py b/nirjas/languages/c_sharp.py
@@ -1,7 +1,8 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 '''
-Copyright (C) 2020  Ayush Bhardwaj (classicayush@gmail.com), Kaushlendra Pratap (kaushlendrapratap.9837@gmail.com)
+Copyright (C) 2020  Ayush Bhardwaj (classicayush@gmail.com),
+Kaushlendra Pratap (kaushlendrapratap.9837@gmail.com)
 
 SPDX-License-Identifier: LGPL-2.1
 
@@ -20,11 +21,18 @@
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 '''
 
-from nirjas.binder import *
+from nirjas.binder import CommentSyntax, contSingleLines
 from nirjas.output import ScanOutput, SingleLine, MultiLine
 
 
 def c_sharpExtractor(file):
+    '''
+    Extract comments from C# file.
+    :param file: File to scan
+    :type file: string
+    :return: Scan output
+    :rtype: ScanOutput
+    '''
     result = CommentSyntax()
     single_line_comment = result.doubleSlash(file)
     multiline_comment = result.slashStar(file)
@@ -43,12 +51,12 @@ def c_sharpExtractor(file):
     for i in single_line_comment[0]:
         output.single_line_comment.append(SingleLine(i[0], i[1]))
 
-    for idx, i in enumerate(cont_single_line_comment[1]):
+    for idx, _ in enumerate(cont_single_line_comment[1]):
         output.cont_single_line_comment.append(MultiLine(
             cont_single_line_comment[1][idx], cont_single_line_comment[2][idx],
             cont_single_line_comment[3][idx]))
 
-    for idx, i in enumerate(multiline_comment[0]):
+    for idx, _ in enumerate(multiline_comment[0]):
         output.multi_line_comment.append(MultiLine(multiline_comment[0][idx],
                                                    multiline_comment[1][idx],
                                                    multiline_comment[2][idx]))
@@ -57,6 +65,15 @@ def c_sharpExtractor(file):
 
 
 def c_sharpSource(file, new_file: str):
+    '''
+    Extract source from C# file and put at new_file.
+    :param file: File to process
+    :type file: string
+    :param new_file: File to put source at
+    :type new_file: string
+    :return: Path to new file
+    :rtype: string
+    '''
     copy = True
     with open(new_file, 'w+') as f1:
         with open(file, 'r') as f: