Source code for nsaph_gis.downloader

Utilities to download shapefiles from US Census website

#  Copyright (c) 2022. Harvard University
#  Developed by Research Software Engineering,
#  Faculty of Arts and Sciences, Research Computing (FAS RC)
#  Author: Mikhail Polykovsky
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  See the License for the specific language governing permissions and
#  limitations under the License.

import os
import zipfile
from enum import Enum
from typing import Tuple
from urllib import request
import ssl
import certifi

from tqdm import tqdm

[docs]class CensusShapeCollection(Enum): genz = 'genz' tiger = 'tiger'
[docs]class GISDownloader: """ Geographic Downloader downloads shape files for given dates from """ COUNTY_TEMPLATE = '{year}/shp/cb_{year}' ZCTA_GENZ_TEMPLATE = '{year}/shp/cb_{year}' ZCTA_TIGER_URLs = { 2008: '', 2010: '', } for y in range(2012, 2020): ZCTA_TIGER_URLs[y] = f'{y}/ZCTA5/tl_{y}' for y in range(2020, 2023): ZCTA_TIGER_URLs[y] = f'{y}/ZCTA520/tl_{y}'
[docs] @classmethod def download_shapes(cls, source: CensusShapeCollection, year: int, output_dir: str = None, strict: bool = False) -> None: cls.download_zcta(CensusShapeCollection.genz, year, output_dir, strict) cls.download_zcta(CensusShapeCollection.tiger, year, output_dir, strict) cls.download_county(year, output_dir, strict)
[docs] @classmethod def download_zcta(cls, source: CensusShapeCollection, year: int, output_dir: str = None, strict: bool = False) -> None: if source == CensusShapeCollection.genz: zip_url, is_exact = cls._get_genz_zcta_url(year) else: zip_url, is_exact = cls._get_tiger_zcta_url(year) if strict and not is_exact: raise ValueError(f'There is no census data for year { year }.') cls._download_shape(zip_url, output_dir)
[docs] @classmethod def download_county(cls, year: int, output_dir: str = None, strict: bool = False) -> None: county_url, is_exact = cls._get_county_url(year) if strict and not is_exact: raise ValueError(f'There is no census data for year { year }.') cls._download_shape(county_url, output_dir)
@classmethod def _download_shape(cls, url: str, output_dir: str = None) -> None: if output_dir is None: output_dir = '.' shape_file = url.rsplit('/', 1)[1] dest = os.path.join(output_dir, shape_file) if not os.path.exists(dest): https_proxy = os.environ.get('HTTPS_PROXY') if https_proxy: proxy = request.ProxyHandler({'http': https_proxy, 'https': https_proxy}) opener = request.build_opener(proxy) request.install_opener(opener) with tqdm(desc=f'Downloading {url}') as bar: def report(blocknum, bs, size): = size bar.update(bs) ssl._create_default_https_context = ssl._create_unverified_context request.urlretrieve(url, dest, reporthook=report) with zipfile.ZipFile(dest, 'r') as zip_ref: zip_ref.extractall(output_dir) @classmethod def _get_county_url(cls, year: int) -> Tuple[str, bool]: """ Method returns url to county shape file for nearest existing year data """ if year > 2020: return cls._get_county_url(2020)[0], False if year in (2012, 2011) or year < 2010: return cls._get_county_url(2010)[0], False if year == 2010: return '', True if year == 2013: return '', True if 2014 <= year <= 2020: return cls.COUNTY_TEMPLATE.format(year=year), True @classmethod def _get_genz_zcta_url(cls, year: int) -> Tuple[str, bool]: """ Method returns url to zip shape file for nearest existing year data """ if year > 2020: return cls._get_genz_zcta_url(2020)[0], False if year in (2012, 2011) or year < 2010: return cls._get_genz_zcta_url(2010)[0], False if year == 2010: return '', True if year == 2013: return '', True if 2014 <= year <= 2019: return cls.ZCTA_GENZ_TEMPLATE.format(year=year), True if year == 2020: return '', True @classmethod def _get_tiger_zcta_url(cls, year: int) -> Tuple[str, bool]: """ Method returns url to zip shape file for nearest existing year data """ if year in cls.ZCTA_TIGER_URLs: return cls.ZCTA_TIGER_URLs[year], True available_years = sorted( [key for key in cls.ZCTA_TIGER_URLs], reverse=True ) for y in available_years: if y <= year: return cls.ZCTA_TIGER_URLs[y], False return cls.ZCTA_TIGER_URLs[available_years[-1]], False