"""Pack the local project workspace into a gzipped tarball for upload.
We tar the user's CWD with sensible excludes so they don't have to think about
data files / helper modules — the server gets everything it needs to run
``python main.py`` in an isolated working directory.
Exclusion rules (in order):
1. Hard-coded defaults: ``logger/``, ``export/``, ``__pycache__/``, ``.git/``,
``.venv/``, ``.mypy_cache/``, ``.pytest_cache/``, ``.idea/``, ``.vscode/``.
2. Patterns from ``.fennignore`` (one shell-style glob per line, ``#`` comments).
3. User-supplied ``extra_excludes`` (shell-style globs).
A hard size cap on the *uncompressed* total prevents accidental uploads of
multi-gigabyte datasets.
"""
from __future__ import annotations
import fnmatch
import tarfile
import tempfile
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable, List, Optional, Sequence
from fenn.remote.exceptions import WorkspaceTooLargeError
REQUIREMENTS_FILENAME = "requirements.txt"
DEFAULT_EXCLUDES: tuple[str, ...] = (
"logger",
"export",
"__pycache__",
".git",
".venv",
"venv",
".mypy_cache",
".pytest_cache",
".idea",
".vscode",
".DS_Store",
"*.pyc",
)
DEFAULT_MAX_BYTES = 100 * 1024 * 1024 # 100 MB uncompressed
@dataclass
class WorkspacePack:
"""Handle to a packed workspace tarball.
``path`` is a ``NamedTemporaryFile``-backed gzip; the caller is responsible
for deleting it (use :meth:`cleanup` in a ``finally`` block).
"""
path: Path
uncompressed_bytes: int
file_count: int
script_relpath: str
def cleanup(self) -> None:
try:
self.path.unlink()
except FileNotFoundError:
pass
def detect_venv_spec(root: Path) -> Optional[dict]:
"""Return a venv setup spec for the server if ``root`` ships dependencies.
Currently looks only for a top-level ``requirements.txt``. The returned
dict is the payload the CLI passes through to the server under
``meta['venv']`` so the remote can build an isolated venv and install
deps before launching the entrypoint.
"""
req = root / REQUIREMENTS_FILENAME
if req.is_file():
return {"enabled": True, "requirements": REQUIREMENTS_FILENAME}
return None
def _load_fennignore(root: Path) -> List[str]:
path = root / ".fennignore"
if not path.exists():
return []
patterns: List[str] = []
for raw in path.read_text(encoding="utf-8").splitlines():
line = raw.strip()
if not line or line.startswith("#"):
continue
patterns.append(line)
return patterns
def _is_excluded(rel: Path, patterns: Sequence[str]) -> bool:
parts = rel.parts
name = rel.name
posix = rel.as_posix()
for pattern in patterns:
# Directory or file name component match (e.g. "logger" matches any
# path containing a "logger" component) — same UX as .gitignore for
# simple bare names.
if "/" not in pattern and "*" not in pattern and "?" not in pattern:
if pattern in parts:
return True
continue
# Glob: match against the full POSIX path and the basename.
if fnmatch.fnmatch(posix, pattern) or fnmatch.fnmatch(name, pattern):
return True
return False
[docs]
def pack_workspace(
root: Path,
script: Path,
*,
extra_includes: Optional[Iterable[Path]] = None,
extra_excludes: Optional[Iterable[str]] = None,
max_bytes: int = DEFAULT_MAX_BYTES,
) -> WorkspacePack:
"""Tar+gzip ``root`` into a tempfile.
Args:
root: Project directory to pack. Must be an existing directory.
script: Path (absolute or relative to ``root``) of the entrypoint
script. Must live underneath ``root``. Returned in
:attr:`WorkspacePack.script_relpath` as a POSIX relative path so
the server can locate it in the unpacked workdir.
extra_includes: Paths that should be force-included even if they would
otherwise be excluded.
extra_excludes: Additional shell-glob patterns to skip.
max_bytes: Cap on the uncompressed total. Raises
:class:`WorkspaceTooLargeError` if exceeded.
Returns:
A :class:`WorkspacePack` pointing at the gzipped tarball.
"""
root = root.resolve()
if not root.is_dir():
raise FileNotFoundError(f"Workspace root not found: {root}")
if not script.is_absolute():
script = (root / script).resolve()
else:
script = script.resolve()
try:
script_rel = script.relative_to(root)
except ValueError as exc:
raise ValueError(
f"Script {script} must live inside the workspace root {root}"
) from exc
if not script.is_file():
raise FileNotFoundError(f"Entrypoint script not found: {script}")
patterns = list(DEFAULT_EXCLUDES)
patterns.extend(_load_fennignore(root))
if extra_excludes:
patterns.extend(extra_excludes)
force_include = {
p.resolve().relative_to(root)
for p in (extra_includes or [])
if p.resolve().is_relative_to(root)
}
tmp = tempfile.NamedTemporaryFile(
prefix="fenn-workspace-", suffix=".tar.gz", delete=False
)
tmp.close()
tar_path = Path(tmp.name)
total_bytes = 0
file_count = 0
try:
with tarfile.open(tar_path, mode="w:gz") as tar:
for file_path in sorted(root.rglob("*")):
if not file_path.is_file():
continue
rel = file_path.relative_to(root)
if rel not in force_include and _is_excluded(rel, patterns):
continue
size = file_path.stat().st_size
total_bytes += size
if total_bytes > max_bytes:
raise WorkspaceTooLargeError(
f"Workspace exceeds {max_bytes:,} bytes "
f"(uncompressed). Add entries to .fennignore or pass "
f"--exclude. Hit the limit at {rel.as_posix()!r}."
)
file_count += 1
# Store paths POSIX-style (forward slashes) so the server can
# decompress safely on any OS.
tar.add(file_path, arcname=rel.as_posix(), recursive=False)
except BaseException:
tar_path.unlink(missing_ok=True)
raise
return WorkspacePack(
path=tar_path,
uncompressed_bytes=total_bytes,
file_count=file_count,
script_relpath=script_rel.as_posix(),
)