73 lines
2.4 KiB
Python
73 lines
2.4 KiB
Python
|
|
from enum import Enum
|
|
import hashlib
|
|
import pathlib
|
|
import tarfile
|
|
import zipfile
|
|
import urllib.request
|
|
from SCons.Script import *
|
|
|
|
class ArchiveType(Enum):
|
|
TAR_GZ = 0
|
|
ZIP = 1
|
|
|
|
def _detect_archive_type(url: str) -> ArchiveType:
|
|
if url.lower().endswith('.tar.gz'):
|
|
return ArchiveType.TAR_GZ
|
|
elif url.lower().endswith('.zip'):
|
|
return ArchiveType.ZIP
|
|
raise Exception('could not detect archive type from URL')
|
|
|
|
def _archive_type_ext(archive_type: ArchiveType) -> str:
|
|
if archive_type == ArchiveType.TAR_GZ:
|
|
return 'tar.gz'
|
|
elif archive_type == ArchiveType.ZIP:
|
|
return 'zip'
|
|
raise Exception('invalid archive type')
|
|
|
|
def _download_file(url: str, path: pathlib.Path) -> None:
|
|
if path.exists():
|
|
return
|
|
dl_path = path.with_suffix(f'{path.suffix}.tmp')
|
|
if dl_path.exists():
|
|
dl_path.unlink()
|
|
print(f'Downloading {url} to {dl_path}...')
|
|
urllib.request.urlretrieve(url, dl_path)
|
|
dl_path.rename(path)
|
|
|
|
def _extract_file(path: pathlib.Path, output_dir: str, archive_type: ArchiveType, skip_folders: int) -> None:
|
|
if archive_type == ArchiveType.TAR_GZ:
|
|
file = tarfile.open(str(path))
|
|
if skip_folders != 0:
|
|
def skip_filer(member: tarfile.TarInfo, path: str) -> tarfile.TarInfo:
|
|
name_parts = member.name.split('/')
|
|
if len(name_parts) <= skip_folders:
|
|
return None
|
|
return member.replace(name = '/'.join(name_parts[skip_folders:]))
|
|
file.extraction_filter = skip_filer
|
|
file.extractall(output_dir)
|
|
file.close()
|
|
elif archive_type == ArchiveType.ZIP:
|
|
file = zipfile.open(str(path))
|
|
file.extractall(output_dir)
|
|
file.close()
|
|
else:
|
|
raise Exception('invalid archive type')
|
|
|
|
def cook(env: Environment, repo_name: str, url: str, skip_folders: int = 0) -> dict:
|
|
archive_type = _detect_archive_type(url)
|
|
ext = _archive_type_ext(archive_type)
|
|
path = pathlib.Path(env['DOWNLOAD_DIR'], f'{hashlib.shake_128(url.encode("utf-8")).hexdigest(6)}.{ext}')
|
|
output_dir = pathlib.Path(env['CLONE_DIR'], 'download', repo_name)
|
|
stamp_file = pathlib.Path(output_dir, '.spp_extracted')
|
|
|
|
if not stamp_file.exists():
|
|
_download_file(url, path)
|
|
_extract_file(path, output_dir, archive_type, skip_folders)
|
|
stamp_file.touch()
|
|
|
|
return {
|
|
'extracted_root': str(output_dir)
|
|
}
|
|
|