creativecommons · IamMQaisar · Mar 14, 2024 · Mar 14, 2024 · Mar 14, 2024 · Mar 15, 2024
@@ -17,22 +17,24 @@
 from requests.adapters import HTTPAdapter
 from urllib3.util.retry import Retry
 
-# Set up current working directory
+# Set up current working directory (CWD) and root_path
 CWD = os.path.dirname(os.path.abspath(__file__))
+root_path = os.path.dirname(CWD)
 # Load environment variables
-dotenv_path = os.path.join(os.path.dirname(CWD), ".env")
+dotenv_path = os.path.join(root_path, ".env")
 load_dotenv(dotenv_path)
 
-# Get the current date
+# Gets Date then Create File in CWD with Date Attached
 today = dt.datetime.today()
-# Retrieve API keys
-API_KEYS = os.getenv("GOOGLE_API_KEYS").split(",")
-API_KEYS_IND = 0
-# Set up file path for CSV report
 DATA_WRITE_FILE = (
     f"{CWD}" f"/data_deviantart_{today.year}_{today.month}_{today.day}.csv"
 )
-# Retrieve Programmable Search Engine key from environment variables
+
+# Global Variable for API_KEYS indexing
+API_KEYS_IND = 0
+
+# Gets API_KEYS and PSE_KEY from .env file
+API_KEYS = os.getenv("GOOGLE_API_KEYS").split(",")
 PSE_KEY = os.getenv("PSE_KEY")
 
 
@@ -41,11 +43,14 @@ def get_license_list():
     Provides the list of license from 2018's record of Creative Commons.
 
     Returns:
-    - np.array: An array containing all license types that should be
-    searched via Programmable Search Engine.
+    - np.array:
+            An np array containing all license types that should be searched
+            via Programmable Search Engine (PSE).
     """
     # Read license data from file
-    cc_license_data = pd.read_csv(f"{CWD}/legal-tool-paths.txt", header=None)
+    cc_license_data = pd.read_csv(
+        f"{root_path}/legal-tool-paths.txt", header=None
+    )
     # Define regex pattern to extract license types
     license_pattern = r"((?:[^/]+/){2}(?:[^/]+)).*"
     license_list = (
@@ -126,7 +131,7 @@ def get_response_elems(license):
 
 
 def set_up_data_file():
-    """Writes the header row to the file to contain DeviantArt data."""
+    # Writes the header row to the file to contain DeviantArt data.
     header_title = "LICENSE TYPE,Document Count"
     with open(DATA_WRITE_FILE, "w") as f:
         f.write(f"{header_title}\n")
@@ -135,9 +140,11 @@ def set_up_data_file():
 def record_license_data(license_type):
     """Writes the row for LICENSE_TYPE to the file to contain DeviantArt data.
     Args:
-    - license_type(str): A string representing the type of license.
-    It's a segment of the URL towards the license description. If not provided,
-    it defaults to None, indicating no assumption about the license type.
+    - license_type:
+            A string representing the type of license, and should be a segment
+            of its URL towards the license description. Alternatively, the
+            default None value stands for having no assumption about license
+            type.
     """
     data_log = (
         f"{license_type},"
@@ -153,9 +160,8 @@ def record_all_licenses():
     list and writes this data into the DATA_WRITE_FILE, as specified by the
     constant.
     """
-    # Get the list of license types
+    # Gets the list of license types and record data for each license type
     license_list = get_license_list()
-    # Record data for each license types
     for license_type in license_list:
         record_license_data(license_type)
 

@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 """
-This file is dedicated to obtain a .csv record report for Google Custom Search
-Data.
+This file is dedicated to obtain a .csv record report for
+Google Custom Search Data.
 """
 
 # Standard library
@@ -17,13 +17,16 @@
 from requests.adapters import HTTPAdapter
 from urllib3.util.retry import Retry
 
+# Set up current working directory (CWD) and root_path
 CWD = os.path.dirname(os.path.abspath(__file__))
-dotenv_path = os.path.join(os.path.dirname(CWD), ".env")
+root_path = os.path.dirname(CWD)
+# Load environment variables
+dotenv_path = os.path.join(root_path, ".env")
 load_dotenv(dotenv_path)
 
+
+# Gets Date then Create Files in CWD with Date Attached
 today = dt.datetime.today()
-API_KEYS = os.getenv("GOOGLE_API_KEYS").split(",")
-API_KEYS_IND = 0
 DATA_WRITE_FILE = (
     f"{CWD}"
     f"/data_google_custom_search_{today.year}_{today.month}_{today.day}.csv"
@@ -38,18 +41,30 @@
     f"/data_google_custom_search_country_"
     f"{today.year}_{today.month}_{today.day}.csv"
 )
-SEARCH_HALFYEAR_SPAN = 20
+
+# Gets API_KEYS and PSE_KEY from .env file
+API_KEYS = os.getenv("GOOGLE_API_KEYS").split(",")
 PSE_KEY = os.getenv("PSE_KEY")
 
+# Global Variables for API_KEYS indexing and Search Halfyear Span
+API_KEYS_IND = 0
+SEARCH_HALFYEAR_SPAN = 20
+
 
 def get_license_list():
-    """Provides the list of license from 2018's record of Creative Commons.
+    """
+    Provides the list of licenses from 2018's record of Creative Commons.
 
     Returns:
-        np.array: An np array containing all license types that should be
-        searched via Programmable Search Engine.
+    - np.array:
+            An np array containing all license types that should be searched
+            via Programmable Search Engine (PSE).
     """
-    cc_license_data = pd.read_csv(f"{CWD}/legal-tool-paths.txt", header=None)
+    # Read license data from file
+    cc_license_data = pd.read_csv(
+        f"{root_path}/legal-tool-paths.txt", header=None
+    )
+    # Define regex pattern to extract license types
     license_pattern = r"((?:[^/]+/){2}(?:[^/]+)).*"
     license_list = (
         cc_license_data[0]
@@ -61,11 +76,13 @@ def get_license_list():
 
 
 def get_lang_list():
-    """Provides the list of language to find Creative Commons usage data on.
+    """
+    Provides the list of languages to find Creative Commons usage data on.
 
     Returns:
-        pd.DataFrame: A Dataframe whose index is language name and has a column
-        for the corresponding language code.
+    - pd.DataFrame:
+                A Dataframe whose index is language name and has a column for
+                the corresponding language code.
     """
     languages = pd.read_csv(
         f"{CWD}/google_lang.txt", sep=": ", header=None, engine="python"
@@ -90,16 +107,18 @@ def get_lang_list():
 
 
 def get_country_list(select_all=False):
-    """Provides the list of countries to find Creative Commons usage data on.
+    """
+    Provides the list of countries to find Creative Commons usage data on.
 
     Args:
-        select_all:
-            A boolean indicating whether the returned list will have all
-            countries.
+    - select_all:
+                A boolean indicating whether the returned list will have all
+                countries.
 
     Returns:
-        pd.DataFrame: A Dataframe whose index is country name and has a column
-        for the corresponding country code.
+    - pd.DataFrame:
+                A Dataframe whose index is country name and has a column for
+                the corresponding country code.
     """
     countries = pd.read_csv(CWD + "/google_countries.tsv", sep="\t")
     countries["Country"] = countries["Country"].str.replace(",", " ")
@@ -125,30 +144,32 @@ def get_country_list(select_all=False):
 
 
 def get_request_url(license=None, country=None, language=None, time=False):
-    """Provides the API Endpoint URL for specified parameter combinations.
+    """
+    Provides the API Endpoint URL for specified parameter combinations.
 
     Args:
-        license:
+    - license:
             A string representing the type of license, and should be a segment
             of its URL towards the license description. Alternatively, the
             default None value stands for having no assumption about license
             type.
-        country:
+    - country:
             A string representing the country code of country that the search
             results would be originating from. Alternatively, the default None
             value or "all" stands for having no assumption about country of
             origin.
-        language:
+    - language:
             A string representing the language that the search results are
             presented in. Alternatively, the default None value or "all" stands
             for having no assumption about language of document.
-        time:
+    - time:
             A boolean indicating whether this query is related to video time
             occurrence.
 
     Returns:
-        string: A string representing the API Endpoint URL for the query
-        specified by this function's parameters.
+    - string:
+            A string representing the API Endpoint URL for the query specified
+            by this function's parameters.
     """
     try:
         api_key = API_KEYS[API_KEYS_IND]
@@ -177,32 +198,35 @@ def get_request_url(license=None, country=None, language=None, time=False):
 
 
 def get_response_elems(license=None, country=None, language=None, time=False):
-    """Provides the metadata for query of specified parameters
+    """
+    Provides the metadata for query of specified parameters
 
     Args:
-        license:
+    - license:
             A string representing the type of license, and should be a segment
             of its URL towards the license description. Alternatively, the
             default None value stands for having no assumption about license
             type.
-        country:
+    - country:
             A string representing the country code of country that the search
             results would be originating from. Alternatively, the default None
             value or "all" stands for having no assumption about country of
             origin.
-        lang:
+    - lang:
             A string representing the language that the search results are
             presented in. Alternatively, the default None value or "all" stands
             for having no assumption about language of document.
-        time:
+    - time:
             A boolean indicating whether this query is related to video time
             occurrence.
 
     Returns:
-        dict: A dictionary mapping metadata to its value provided from the API
-        query of specified parameters.
+    - dict:
+            A dictionary mapping metadata to its value provided from the API
+            query of specified parameters.
     """
     try:
+        # Make a request to the API and handle potential retries
         request_url = get_request_url(license, country, language, time)
         max_retries = Retry(
             total=5,
@@ -221,6 +245,7 @@ def get_response_elems(license=None, country=None, language=None, time=False):
         return search_data_dict
     except Exception as e:
         if isinstance(e, requests.exceptions.HTTPError):
+            # If quota limit exceeded, switch to the next API key
             global API_KEYS_IND
             API_KEYS_IND += 1
             print(
@@ -233,7 +258,7 @@ def get_response_elems(license=None, country=None, language=None, time=False):
 
 
 def set_up_data_file():
-    """Writes the header row to file to contain Google Query data."""
+    # Write header rows in files to contain Google Query data.
     header_title = "LICENSE TYPE,No Priori,"
     selected_countries = get_country_list()
     all_countries = get_country_list(select_all=True)
@@ -257,18 +282,19 @@ def set_up_data_file():
 
 
 def record_license_data(license_type=None, time=False, country=False):
-    """Writes the row for LICENSE_TYPE to file to contain Google Query data.
+    """
+    Writes the row for LICENSE_TYPE to file to contain Google Query data.
 
     Args:
-        license:
+    - license_type:
             A string representing the type of license, and should be a segment
             of its URL towards the license description. Alternatively, the
             default None value stands for having no assumption about license
             type.
-        time:
+    - time:
             A boolean indicating whether this query is related to video time
             occurrence.
-        country:
+    - country:
             A boolean indicating whether this query is related to country
             occurrence.
     """
@@ -317,16 +343,19 @@ def record_license_data(license_type=None, time=False, country=False):
 
 
 def record_all_licenses():
-    """Records the data of all license types findable in the license list and
+    """
+    Records the data of all license types findable in the license list and
     records these data into the DATA_WRITE_FILE and DATA_WRITE_FILE_TIME as
     specified in that constant.
     """
-    license_list = get_license_list()
-    record_license_data(time=False)
+    # Record license data with no assumption about license type
+    record_license_data()
     record_license_data(time=True)
     record_license_data(country=True)
+    # Gets the list of license types and record data for each license type
+    license_list = get_license_list()
     for license_type in license_list:
-        record_license_data(license_type, time=False)
+        record_license_data(license_type)
         record_license_data(license_type, time=True)