scons-plus-plus/recipes/DownloadAndExtract/recipe.py


from enum import Enum
import hashlib
import pathlib
import tarfile
import zipfile
import urllib.request
from SCons.Script import *

class ArchiveType(Enum):
    TAR_GZ = 0
    ZIP = 1

def _detect_archive_type(url: str) -> ArchiveType:
    if url.lower().endswith('.tar.gz'):
        return ArchiveType.TAR_GZ
    elif url.lower().endswith('.zip'):
        return ArchiveType.ZIP
    raise Exception('could not detect archive type from URL')

def _archive_type_ext(archive_type: ArchiveType) -> str:
    if archive_type == ArchiveType.TAR_GZ:
        return 'tar.gz'
    elif archive_type == ArchiveType.ZIP:
        return 'zip'
    raise Exception('invalid archive type')

def _download_file(url: str, path: pathlib.Path) -> None:
    if path.exists():
        return
    dl_path = path.with_suffix(f'{path.suffix}.tmp')
    if dl_path.exists():
        dl_path.unlink()
    print(f'Downloading {url} to {dl_path}...')
    urllib.request.urlretrieve(url, dl_path)
    dl_path.rename(path)

def _extract_file(path: pathlib.Path, output_dir: str, archive_type: ArchiveType, skip_folders: int) -> None:
    if archive_type == ArchiveType.TAR_GZ:
        file = tarfile.open(str(path))
        if skip_folders != 0:
            def skip_filer(member: tarfile.TarInfo, path: str) -> tarfile.TarInfo:
                name_parts = member.name.split('/')
                if len(name_parts) <= skip_folders:
                    return None
                return member.replace(name = '/'.join(name_parts[skip_folders:]))
            file.extraction_filter = skip_filer
        file.extractall(output_dir)
        file.close()
    elif archive_type == ArchiveType.ZIP:
        file = zipfile.open(str(path))
        file.extractall(output_dir)
        file.close()
    else:
        raise Exception('invalid archive type')

def cook(env: Environment, repo_name: str, url: str, skip_folders: int = 0) -> dict:
    archive_type = _detect_archive_type(url)
    ext = _archive_type_ext(archive_type)
    path = pathlib.Path(env['DOWNLOAD_DIR'], f'{hashlib.shake_128(url.encode("utf-8")).hexdigest(6)}.{ext}')
    output_dir = pathlib.Path(env['CLONE_DIR'], 'download', repo_name)
    stamp_file = pathlib.Path(output_dir, '.spp_extracted')

    if not stamp_file.exists():
        _download_file(url, path)
        _extract_file(path, output_dir, archive_type, skip_folders)
        stamp_file.touch()

    return {
        'extracted_root': str(output_dir)
    }