blob: 45022c8287b338ee04addb28b58c8281bfb02bf1 [file] [log] [blame] [edit]
#!/usr/bin/env python3
"""Region database generator from public documents."""
from __future__ import print_function
import pycountry
# Load existing database.
import regions
def LoadZoneTable():
# zone.tab from http://www.iana.org/time-zones
zones = {}
with open('zone.tab') as f:
for line in f:
if line.startswith('#'):
continue
# format: code coordinates TZ comments
code, unused, tz = line.split('\t')[0:3]
if code not in zones:
zones[code] = []
zones[code] += [tz.strip()]
return zones
def LoadCountryTable(tzinfo):
# http://download.geonames.org/export/dump/countryInfo.txt
new_regions = {}
skip_regions = (['BV', 'HM', 'CS', 'XK', 'AN'] + # reserved regions
['HM', 'AQ'] + # broken data
['CA']) # We only have ca.variants in region database.
with open('countryInfo.txt') as f:
for line in f:
if line.startswith('#'):
continue
# format: [0]ISO3166-1a2, [4]Country, [5]Capital, [15]Languages
data = line.split('\t')
code = data[0]
if code in skip_regions:
continue
if code not in tzinfo:
exit('Region code <%s> has no timezone info!' % code)
# Try to build XKB.
kbd_region = code.lower()
lang = data[15].partition(',')[0].partition('-')[0]
if len(lang) != 2:
lang = 'en'
kbd_region = 'us'
new_regions[code] = dict(
region_code=code.lower(),
description=data[4],
keyboards=str('xkb:%s::%s' % (
kbd_region, pycountry.languages.get(alpha2=lang).terminology)),
time_zones=tzinfo[code],
locales=data[15].strip(',').split(','),
)
return new_regions
def main():
def flat(v):
if type(v) is list and len(v) == 1:
return v[0]
return v
tzinfo = LoadZoneTable()
for region in LoadCountryTable(tzinfo).values():
code = region['region_code']
if code in regions.REGIONS:
continue
# Region takes: code, kbd, tz, locale, layout, description, comment,
# regdomain.
print("Region('%s', %r, %r, %r, _KML.ANSI, '%s')," %
(code, region['keyboards'], flat(region['time_zones']),
flat(region['locales']), region['description']))
if __name__ == '__main__':
main()