-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconvertGeoNames.py
48 lines (37 loc) · 2.09 KB
/
convertGeoNames.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# convert GeoName files to desired output format
# input file columns: geonameid name asciiname alternatenames lat long feature class feature code country code cc2 admin1 code admin2 code admin3 code admin4 code population elevation dem timeone modification date
# output file columns geonameid asciiname lat long country code
import pandas as pd
#input files
cityFiles = ['cities1000.txt', 'cities5000.txt', 'cities15000.txt']
#countryFile = 'allCountries.txt'
countryInfoFile = 'Country_Information.txt'
firstCity = True
#output files
cityOutput = 'cities.csv'
countryOutput = 'countries.csv'
countryInfoOutput = 'countryInfo.csv'
columnNames=['geonameid', 'name', 'asciiname', 'alternatenames', 'lat', 'long', 'feature class', 'feature code', 'country code', 'cc2', 'admin1 code', 'admin2 code', 'admin3 code', 'admin4 code', 'population', 'elevation', 'dem', 'timeone', 'modification date']
keepColumns = ['geonameid','asciiname','lat','long', 'country code']
columnNamesCountryInfo = ['ISO', 'ISO3', 'ISO-Numeric', 'fips', 'Country', 'Capital', 'Area(in sq km)', 'Population', 'Continent', 'tld', 'CurrencyCode', 'CurrencyName', 'Phone', 'Postal Code Format', 'Postal Code Regex', 'Languages', 'geonameid', 'neighbours', 'EquivalentFipsCode']
keepColumnsCountryInfo = ['geonameid', 'Country', 'ISO']
def readFile(file, columnNames, keepColumns):
f = pd.read_csv(file,sep='\t', header=None, names=columnNames)
return f[keepColumns]
#for city in cityFiles:
# f=readFile(city, columnNames, keepColumns)
# if firstCity:
# f.to_csv(cityOutput, index=False, sep='\t')
# firstCity = False
# else:
# f.to_csv(cityOutput, mode='a', header=False, index=False, sep='\t')
# print (city + ' processed.')
#print (cityOutput + ' is complete.')
#print ('processing ' + countryFile)
#f = readFile(countryFile, columnNames, keepColumns)
#f.to_csv(countryOutput, index=False, sep='\t')
#print (countryOutput + ' is complete')
print ('processing ' + countryInfoFile)
f = readFile(countryInfoFile, columnNamesCountryInfo, keepColumnsCountryInfo)
f.to_csv(countryInfoOutput, index=False, sep='\t')
print (countryInfoOutput + ' is complete')