fetchEncounters.py

from utilities import *
import time
from datetime import datetime
from selenium.common.exceptions import StaleElementReferenceException
import pandas as pd
import glob

def clean_encounter_data():
    csv_files = glob.glob("*encounters*.csv")

    # Load all CSVs into a list of DataFrames
    dataframes = [pd.read_csv(file) for file in csv_files]

    # If you want a single merged DataFrame
    df = pd.concat(dataframes, ignore_index=True)

    results = []
    columns = ["tracking_number", "date_of_incident", "primary_category", "event_type", "event_subtype",
            "event_associated_number", "event_associated_number_unit", "latitude", "longitude", "description", "actor1", "assoc_actor_1", "actor2", "assoc_actor_2", "actor3",
            "assoc_actor_3", "name_of_ship1", "imo_number_of_ship1", "type_of_ship1", "name_of_ship2", "imo_number_of_ship2", "type_of_ship2", "name_of_ship3", "imo_number_of_ship3",
            "type_of_ship3", "flag_states_involved", "region", "nearest_country_or_territory", "maritime_region", "maritime_subregion", "location", "location_precision", "location_url",
            "source_name", "source_link_external", "source_name2", "source_link2_external", "compiled_from", "compilation_source_link_external"]

    for i,j in df.iterrows():       # Reading the Encounters.csv and go through each row

        #if i == 3:
        #    break

        # Extracting each column of Encounters csv into variables
        event_id = j["event_id"]
        #print ("event_id :", event_id)

        event_type = j["event_type"]
        #print ("event_type :", event_type)

        vessel_id = j["vessel_id"]
        #print ("vessel_id :", vessel_id)

        event_start = j["event_start"]
        #print ("event_start :", event_start)

        event_end = j["event_end"]
        #print ("event_end :", event_end)

        lat_mean = j["lat_mean"]
        #print ("lat_mean :", lat_mean)

        lon_mean = j["lon_mean"]
        #print ("lon_mean :", lon_mean)

        lat_min = j["lat_min"]
        #print ("lat_min :", lat_min)

        lat_max = j["lat_max"]
        #print ("lat_max :", lat_max)

        lon_min = j["lon_min"]
        #print ("lon_min :", lon_min)

        lon_max = j["lon_max"]
        #print ("lon_max :", lon_max)

        event_info = j["event_info"]
        event_info_json = json.loads(event_info)                # Convert event_info to json format
        #print ("event_info_json :", event_info_json)

        event_vessels = j["event_vessels"]
        event_vessels_json = json.loads(event_vessels)          # Convert event_vessels to json format
        #print ("event_vessels_json :", event_vessels_json)

        event_geography = j["event_geography"]
        #print ("event_geography :", event_geography)

        #print ("==============")

        data_source = "Global Fishing Watch - Carrier Vessel Events - Encounter Events"
        link = "https://globalfishingwatch.org/data-download/datasets/carriers:v20220124"
        need_authentication = "Yes"
        periodicity = "Monthly"
        automation_progress = "Complete"
        tracking_number = ''
        date_of_incident = event_start
        primary_category = "Fishing"
        event_type = "Encounter"

        event_subtype = event_info_json.get('authorization_status', 'unknown').capitalize()      # Get authorization_status value from event_info json
        #print ("event_subtype :", event_subtype)

        # Subtract time and date in [event_start] from [event_end] to determine number of hours, populate number of hours here
        event_associated_number = event_info_json["authorization_status"]
        # Remove " UTC" and parse as datetime
        start = datetime.strptime(event_start.replace(" UTC", ""), "%Y-%m-%d %H:%M:%S")
        end = datetime.strptime(event_end.replace(" UTC", ""), "%Y-%m-%d %H:%M:%S")
        # Calculate the difference
        time_difference = end - start
        # Get total hours (including fractions)
        total_hours = time_difference.total_seconds() / 3600
        #print(f"Total Hours: {total_hours:.2f}")  # Output: Total Hours: 22.33
        event_associated_number = round(total_hours,2)
        #print ("event_associated_number :", event_associated_number)

        event_associated_number_unit = "Encounter Length (Hours)"
        latitude = lat_min
        longitude = lon_min
        #print ("event_vessels_json :", len(event_vessels_json))
        #print (event_vessels_json[0])

        actor1 = "Fishermen"
        assoc_actor_1 = ""
        actor2 = "Fishermen"
        assoc_actor_2 = ""
        actor3 = ""
        assoc_actor_3 = ""

        name_of_ship1 = event_vessels_json[0]["name"]
        #print ("name_of_ship1 :", name_of_ship1)
        imo_number_of_ship1 = ""
        type_of_ship1 = event_vessels_json[0]["type"]
        if type_of_ship1 == "carrier":
            type_of_ship1 = "Fish Carrier"
        #print ("type_of_ship1 :", type_of_ship1)

        name_of_ship2 = event_vessels_json[1]["name"]
        #print ("name_of_ship2 :", name_of_ship2)
        imo_number_of_ship2 = ""
        type_of_ship2 = event_vessels_json[1]["type"]
        if type_of_ship2 == "carrier":
            type_of_ship2 = "Fish Carrier"
        elif type_of_ship2 == "fishing":
            type_of_ship2 = "Fishing Vessel"
        #print ("type_of_ship2 :", type_of_ship2)

        name_of_ship3 = ""
        imo_number_of_ship3 = ""
        type_of_ship3 = ""

        flag_1 = event_vessels_json[0]["flag"]
        flag_states_involved = [flag_1]
        flag_2 = event_vessels_json[1]["flag"]
        if flag_2 not in flag_states_involved:
            flag_states_involved.append(flag_2)
        flag_states_involved = ", ".join(flag_states_involved)
        #print ("flag_states_involved :", flag_states_involved)

        description = f"Global Fishing Watch reported an encounter between the {event_vessels_json[0]['flag']}-flagged {type_of_ship1}: {event_vessels_json[0]['name']}, originating from {event_vessels_json[0]['origin_port']['label']}, {event_vessels_json[0]['origin_port']['iso']}, and en route to {event_vessels_json[0]['destination_port']['label']}, {event_vessels_json[0]['destination_port']['iso']}. This encounter also involved the {event_vessels_json[1]['flag']}-flagged {type_of_ship2} {event_vessels_json[1]['name']}, originating from {event_vessels_json[1]['origin_port']['label']}, {event_vessels_json[1]['origin_port']['iso']}, and en route to {event_vessels_json[1]['destination_port']['label']}, {event_vessels_json[1]['destination_port']['iso']}"

        region = ""
        nearest_country_or_territory = ""
        maritime_region = ""
        maritime_subregion = ""

        distance_from_shore_m = event_info_json.get('distance_from_shore_m', 'unknown')
        location = (distance_from_shore_m) /1000
        #print ("location :", location)

        location_precision = "Point"
        location_url = ""
        source_name = "Global Fishing Watch - Carrier Vessel Events, Encounters"
        source_link_external = "https://globalfishingwatch.org/data-download/datasets/carriers:v20220124"
        source_name2 = ""
        source_link2_external = ""
        compiled_from = "Global Fishing Watch - Carrier Vessel Events, Encounters"
        compilation_source_link_external = "https://globalfishingwatch.org/data-download/datasets/carriers:v20220124"

        # Append to results
        results.append([
            tracking_number, date_of_incident, primary_category, event_type, event_subtype,
            event_associated_number, event_associated_number_unit, latitude, longitude, description,
            actor1, assoc_actor_1, actor2, assoc_actor_2, actor3, assoc_actor_3,
            name_of_ship1, imo_number_of_ship1, type_of_ship1,
            name_of_ship2, imo_number_of_ship2, type_of_ship2,
            name_of_ship3, imo_number_of_ship3, type_of_ship3,
            flag_states_involved, region, nearest_country_or_territory,
            maritime_region, maritime_subregion, location, location_precision, location_url,
            source_name, source_link_external, source_name2, source_link2_external,
            compiled_from, compilation_source_link_external
        ])

    new_df = pd.DataFrame(results, columns=columns)

    # Ensure the date_of_incident column is in datetime format
    new_df['date_of_incident'] = pd.to_datetime(new_df['date_of_incident'])

    # Convert the column to the desired format
    new_df['date_of_incident'] = new_df['date_of_incident'].dt.strftime('%Y-%m-%d')

    new_df['location'] = " "

    country_codes = {
        'AFG': 'Afghanistan',
        'ALB': 'Albania',
        'DZA': 'Algeria',
        'AND': 'Andorra',
        'AGO': 'Angola',
        'AIA': 'Anguilla',
        'ATA': 'Antarctica',
        'ATG': 'Antigua and Barbuda',
        'ARG': 'Argentina',
        'ARM': 'Armenia',
        'ABW': 'Aruba',
        'AUS': 'Australia',
        'AUT': 'Austria',
        'AZE': 'Azerbaijan',
        'BHS': 'Bahamas',
        'BHR': 'Bahrain',
        'BGD': 'Bangladesh',
        'BRB': 'Barbados',
        'BLM': 'Saint Barthelemy',
        'LSO': 'Lesotho',
        'BLR': 'Belarus',
        'BEL': 'Belgium',
        'BLZ': 'Belize',
        'BEN': 'Benin',
        'BMU': 'Bermuda',
        'BTN': 'Bhutan',
        'GNB': 'Guinea-Bissau',
        'BOL': 'Bolivia',
        'BES': 'Sint Eustatius',
        'BIH': 'Bosnia and Herzegovina',
        'BWA': 'Botswana',
        'BVT': 'Bouvet Island',
        'BRA': 'Brazil',
        'GBR': 'United Kingdom',
        'VGB': 'British Virgin Islands',
        'BRN': 'Brunei',
        'BGR': 'Bulgaria',
        'BFA': 'Burkina Faso',
        'MMR': 'Burma',
        'BDI': 'Burundi',
        'CPV': 'Cabo Verde',
        'KHM': 'Cambodia',
        'CMR': 'Cameroon',
        'CAN': 'Canada',
        'CYM': 'Cayman Islands',
        'CAF': 'Central African Republic',
        'TCD': 'Chad',
        'CHL': 'Chile',
        'CHN': 'China',
        'COL': 'Colombia',
        'COM': 'Comoros',
        'COD': 'Democratic Republic of the Congo',
        'COG': 'Republic of the Congo',
        'COK': 'Cook Islands',
        'CRI': 'Costa Rica',
        'HRV': 'Croatia',
        'CUB': 'Cuba',
        'CUW': 'Curacao',
        'CYP': 'Cyprus',
        'CZE': 'Czech Republic',
        'DNK': 'Denmark',
        'DJI': 'Djibouti',
        'DMA': 'Dominica',
        'DOM': 'Dominican Republic',
        'NLD': 'Netherlands',
        'ECU': 'Ecuador',
        'EGY': 'Egypt',
        'ARE': 'United Arab Emirates',
        'GNQ': 'Equatorial Guinea',
        'ERI': 'Eritrea',
        'EST': 'Estonia',
        'ETH': 'Ethiopia',
        'FLK': 'Falkland Islands',
        'FRO': 'Faroe Islands',
        'FJI': 'Fiji',
        'PHL': 'Philippines',
        'FIN': 'Finland',
        'FRA': 'France',
        'GUF': 'French Guiana',
        'GAB': 'Gabon',
        'GMB': 'Gambia',
        'GEO': 'Georgia',
        'DEU': 'Germany',
        'GHA': 'Ghana',
        'GRC': 'Greece',
        'GRD': 'Grenada',
        'GLP': 'Guadeloupe',
        'GTM': 'Guatemala',
        'GIN': 'Guinea',
        'GUY': 'Guyana',
        'HTI': 'Haiti',
        'HND': 'Honduras',
        'HKG': 'Hong Kong (China)',
        'HUN': 'Hungary',
        'ISL': 'Iceland',
        'IND': 'India',
        'IDN': 'Indonesia',
        'IRN': 'Iran',
        'IRQ': 'Iraq',
        'IRL': 'Ireland',
        'ISR': 'Israel',
        'ITA': 'Italy',
        'CIV': "Cote d'Ivoire",
        'JAM': 'Jamaica',
        'JPN': 'Japan',
        'JOR': 'Jordan',
        'KAZ': 'Kazakhstan',
        'KEN': 'Kenya',
        'KIR': 'Kiribati',
        'KNA': 'Saint Kitts and Nevis',
        'XKX': 'Kosovo',
        'KWT': 'Kuwait',
        'KGZ': 'Kyrgyzstan',
        'LAO': 'Laos',
        'LVA': 'Latvia',
        'LBN': 'Lebanon',
        'LBR': 'Liberia',
        'LBY': 'Libya',
        'LIE': 'Liechtenstein',
        'LTU': 'Lithuania',
        'LUX': 'Luxembourg',
        'MKD': 'North Macedonia',
        'MYT': 'Mayotte',
        'MDG': 'Madagascar',
        'MWI': 'Malawi',
        'MYS': 'Malaysia',
        'MDV': 'Maldives',
        'MLI': 'Mali',
        'MLT': 'Malta',
        'IMN': 'Isle of Man',
        'MHL': 'Marshall Islands',
        'MTQ': 'Martinique',
        'MRT': 'Mauritania',
        'MUS': 'Mauritius',
        'MEX': 'Mexico',
        'FSM': 'Micronesia',
        'MDA': 'Moldova',
        'MCO': 'Monaco',
        'MNG': 'Mongolia',
        'MNE': 'Montenegro',
        'MSR': 'Montserrat',
        'MAR': 'Morocco',
        'MOZ': 'Mozambique',
        'NAM': 'Namibia',
        'NRU': 'Nauru',
        'NPL': 'Nepal',
        'NCL': 'New Caledonia',
        'NZL': 'New Zealand',
        'VUT': 'Vanuatu',
        'NIC': 'Nicaragua',
        'NGA': 'Nigeria',
        'NER': 'Niger',
        'PRK': 'North Korea',
        'NOR': 'Norway',
        'OMN': 'Oman',
        'PAK': 'Pakistan',
        'PLW': 'Palau',
        'N/A': 'Palestinian Territories',
        'PAN': 'Panama',
        'PNG': 'Papua New Guinea',
        'PRY': 'Paraguay',
        'PER': 'Peru',
        'POL': 'Poland',
        'PRT': 'Portugal',
        'QAT': 'Qatar',
        'REU': 'Reunion',
        'ROU': 'Romania',
        'RUS': 'Russia',
        'RWA': 'Rwanda',
        'LCA': 'Saint Lucia',
        'VCT': 'Saint Vincent and the Grenadines',
        'MAF': 'Saint Martin',
        'SPM': 'Saint Pierre and Miquelon',
        'SLV': 'El Salvador',
        'SMR': 'San Marino',
        'WSM': 'Samoa',
        'STP': 'Sao Tome and Principe',
        'SAU': 'Saudi Arabia',
        'SEN': 'Senegal',
        'SRB': 'Serbia',
        'SYC': 'Seychelles',
        'SLE': 'Sierra Leone',
        'SGP': 'Singapore',
        'SXM': 'Sint Maarten',
        'SVK': 'Slovakia',
        'SVN': 'Slovenia',
        'SLB': 'Solomon Islands',
        'SOM': 'Somalia',
        'ZAF': 'South Africa',
        'SGS': 'South Georgia and South Sandwich Islands',
        'KOR': 'South Korea',
        'SSD': 'South Sudan',
        'ESP': 'Spain',
        'LKA': 'Sri Lanka',
        'SDN': 'Sudan',
        'SUR': 'Suriname',
        'SWZ': 'Eswatini',
        'SWE': 'Sweden',
        'CHE': 'Switzerland',
        'SYR': 'Syria',
        'TWN': 'Taiwan',
        'TJK': 'Tajikistan',
        'TZA': 'Tanzania',
        'THA': 'Thailand',
        'TLS': 'Timor-Leste',
        'TGO': 'Togo',
        'TON': 'Tonga',
        'TTO': 'Trinidad and Tobago',
        'TUN': 'Tunisia',
        'TUR': 'Turkey',
        'TKM': 'Turkmenistan',
        'TCA': 'Turks and Caicos Islands',
        'TUV': 'Tuvalu',
        'UGA': 'Uganda',
        'UKR': 'Ukraine',
        'URY': 'Uruguay',
        'UZB': 'Uzbekistan',
        'VAT': 'Holy See',
        'VEN': 'Venezuela',
        'VNM': 'Vietnam',
        'YEM': 'Yemen',
        'ZMB': 'Zambia',
        'ZWE': 'Zimbabwe',
        'CPT': 'Clipperton Island',
        'GBZ': 'Gibraltar',
    }

    new_df['flag_states_involved'] = new_df['flag_states_involved'].fillna("").apply(
        lambda x: ', '.join([country_codes.get(code.strip(), code) for code in x.split(', ')]))

    return new_df

def fetch_Encounters(latest_date):
    driver, wait = get_driver()

    driver.get("https://globalfishingwatch.org/data-download/datasets/carriers:v20220124")

    credentials = get_credentials("Gloabl-Fishing-Watch")
    
    username = credentials['username']
    password = credentials['password']

    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "input#email"))).send_keys(username)
    driver.find_element(By.CSS_SELECTOR, "input#password").send_keys(password)
    driver.find_element(By.CSS_SELECTOR, "input.btn").click()

    # Wait for the table and its rows to load
    wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div[role="table"] div[role="row"] div[role="cell"]')))

    csv_count = 0
    timeout = 100
    rows = driver.find_elements(By.CSS_SELECTOR, 'div[role="table"] div[role="row"]')
    for row in rows[1:]:
        try:
            cells = row.find_elements(By.CSS_SELECTOR, 'div[role="cell"]')

            if "encounters" in cells[1].text:
                date_text = cells[-1].text.strip()
                # Convert the extracted date to datetime for comparison
                row_date = datetime.strptime(date_text, "%m/%d/%Y")
                if (row_date > latest_date):
                    cells[1].find_element(By.CSS_SELECTOR, 'button').click()
                    csv_count += 1

                    # Start timer
                    start_time = time.time()
                    while len(glob.glob("*encounters*.csv")) != csv_count:
                        time.sleep(5)

                        # Escape condition: Timeout if CSV doesn't download within `timeout_seconds`
                        if time.time() - start_time > timeout:
                            print("Error: CSV download timed out. Skipping this row.")
                            break
        except StaleElementReferenceException:
            continue

    driver.quit()
    encounters_data = clean_encounter_data()

    return encounters_data