Source code for vistir.path

# -*- coding=utf-8 -*-
from __future__ import absolute_import, print_function, unicode_literals

import atexit
import errno
import functools
import locale
import os
import posixpath
import shutil
import stat
import sys
import typing
import time
import unicodedata
import warnings

from pathlib import Path
from tempfile import NamedTemporaryFile, TemporaryDirectory
from typing import Optional, Callable
from urllib import parse as urllib_parse
from urllib import request as urllib_request

from urllib.parse import quote

if typing.TYPE_CHECKING:
    from types import TracebackType
    from typing import (
        Any,
        AnyStr,
        ByteString,
        Generator,
        Iterator,
        List,
        Text,
        Tuple,
        Type,
        Union,
    )

    TPath = os.PathLike
    TFunc = Callable[..., Any]


__all__ = [
    "check_for_unc_path",
    "get_converted_relative_path",
    "handle_remove_readonly",
    "normalize_path",
    "is_in_path",
    "is_file_url",
    "is_readonly_path",
    "is_valid_url",
    "mkdir_p",
    "ensure_mkdir_p",
    "create_tracked_tempdir",
    "create_tracked_tempfile",
    "path_to_url",
    "rmtree",
    "safe_expandvars",
    "set_write_bit",
    "url_to_path",
    "walk_up",
]


if os.name == "nt":
    warnings.filterwarnings(
        "ignore",
        category=DeprecationWarning,
        message="The Windows bytes API has been deprecated.*",
    )


def unicode_path(path):
    # type: (TPath) -> Text
    # Paths are supposed to be represented as unicode here
    return path


def native_path(path):
    # type: (TPath) -> str
    return str(path)


# once again thank you django...
# https://github.com/django/django/blob/fc6b90b/django/utils/_os.py
if os.name == "nt":
    abspathu = os.path.abspath
else:

    def abspathu(path):
        # type: (TPath) -> Text
        """Version of os.path.abspath that uses the unicode representation of
        the current working directory, thus avoiding a UnicodeDecodeError in
        join when the cwd has non-ASCII characters."""
        if not os.path.isabs(path):
            path = os.path.join(os.getcwd(), path)
        return os.path.normpath(path)


[docs]def normalize_path(path):
    # type: (TPath) -> Text
    """Return a case-normalized absolute variable-expanded path.

    :param str path: The non-normalized path
    :return: A normalized, expanded, case-normalized path
    :rtype: str
    """

    path = os.path.abspath(os.path.expandvars(os.path.expanduser(str(path))))
    if os.name == "nt" and os.path.exists(path):
        from ._winconsole import get_long_path

        path = get_long_path(path)

    return os.path.normpath(os.path.normcase(path))


[docs]def is_in_path(path, parent):
    # type: (TPath, TPath) -> bool
    """Determine if the provided full path is in the given parent root.

    :param str path: The full path to check the location of.
    :param str parent: The parent path to check for membership in
    :return: Whether the full path is a member of the provided parent.
    :rtype: bool
    """

    return normalize_path(path).startswith(normalize_path(parent))


def normalize_drive(path):
    # type: (TPath) -> Text
    """Normalize drive in path so they stay consistent.

    This currently only affects local drives on Windows, which can be
    identified with either upper or lower cased drive names. The case is
    always converted to uppercase because it seems to be preferred.
    """
    from .misc import to_text

    if os.name != "nt" or not (
        isinstance(path, str) or getattr(path, "__fspath__", None)
    ):
        return path  # type: ignore

    drive, tail = os.path.splitdrive(path)
    # Only match (lower cased) local drives (e.g. 'c:'), not UNC mounts.
    if drive.islower() and len(drive) == 2 and drive[1] == ":":
        return "{}{}".format(drive.upper(), tail)

    return to_text(path, encoding="utf-8")


[docs]def path_to_url(path):
    # type: (TPath) -> Text
    """Convert the supplied local path to a file uri.

    :param str path: A string pointing to or representing a local path
    :return: A `file://` uri for the same location
    :rtype: str

    >>> path_to_url("/home/user/code/myrepo/myfile.zip")
    'file:///home/user/code/myrepo/myfile.zip'
    """
    from .misc import to_bytes

    if not path:
        return path  # type: ignore
    normalized_path = Path(normalize_drive(os.path.abspath(path))).as_posix()
    if os.name == "nt" and normalized_path[1] == ":":
        drive, _, path = normalized_path.partition(":")
        # XXX: This enables us to handle half-surrogates that were never
        # XXX: actually part of a surrogate pair, but were just incidentally
        # XXX: passed in as a piece of a filename
        quoted_path = quote(path, errors="backslashreplace")
        return "file:///{}:{}".format(drive, quoted_path)
    # XXX: This is also here to help deal with incidental dangling surrogates
    # XXX: on linux, by making sure they are preserved during encoding so that
    # XXX: we can urlencode the backslash correctly
    # bytes_path = to_bytes(normalized_path, errors="backslashreplace")
    return "file://{}".format(quote(path, errors="backslashreplace"))


[docs]def url_to_path(url):
    # type: (str) -> str
    """Convert a valid file url to a local filesystem path.

    Follows logic taken from pip's equivalent function
    """

    assert is_file_url(url), "Only file: urls can be converted to local paths"
    _, netloc, path, _, _ = urllib_parse.urlsplit(url)
    # Netlocs are UNC paths
    if netloc:
        netloc = "\\\\" + netloc

    path = urllib_request.url2pathname(netloc + path)
    return urllib_parse.unquote(path)


[docs]def is_valid_url(url):
    # type: (Union[str, bytes]) -> bool
    """Checks if a given string is an url."""
    from .misc import to_text

    if not url:
        return url  # type: ignore
    pieces = urllib_parse.urlparse(to_text(url))
    return all([pieces.scheme, pieces.netloc])


[docs]def is_file_url(url):
    # type: (Any) -> bool
    """Returns true if the given url is a file url."""
    from .misc import to_text

    if not url:
        return False
    if not isinstance(url, str):
        try:
            url = url.url
        except AttributeError:
            raise ValueError("Cannot parse url from unknown type: {!r}".format(url))
    url = to_text(url, encoding="utf-8")
    return urllib_parse.urlparse(url.lower()).scheme == "file"


[docs]def is_readonly_path(fn):
    # type: (TPath) -> bool
    """Check if a provided path exists and is readonly.

    Permissions check is `bool(path.stat & stat.S_IREAD)` or `not
    os.access(path, os.W_OK)`
    """
    if os.path.exists(fn):
        file_stat = os.stat(fn).st_mode
        return not bool(file_stat & stat.S_IWRITE) or not os.access(fn, os.W_OK)
    return False


[docs]def mkdir_p(newdir, mode=0o777):
    warnings.warn(
        ('This function is deprecated and will be removed in version 0.8.'
         'Use os.makedirs instead'), DeprecationWarning, stacklevel=2)
    # This exists in shutil already
    # type: (TPath, int) -> None
    """Recursively creates the target directory and all of its parents if they
    do not already exist.  Fails silently if they do.

    :param str newdir: The directory path to ensure
    :raises: OSError if a file is encountered along the way
    """
    if os.path.exists(newdir):
        if not os.path.isdir(newdir):
            raise OSError(
                "a file with the same name as the desired dir, '{}', already exists.".format(
                    newdir
                )
            )
        return None
    os.makedirs(newdir, mode)


[docs]def ensure_mkdir_p(mode=0o777):
    # type: (int) -> Callable[Callable[..., Any], Callable[..., Any]]
    """Decorator to ensure `mkdir_p` is called to the function's return
    value."""
    warnings.warn('This function is deprecated and will be removed in version 0.8.',
                  DeprecationWarning, stacklevel=2)

    # This exists in shutil already
    def decorator(f):
        # type: (Callable[..., Any]) -> Callable[..., Any]
        @functools.wraps(f)
        def decorated(*args, **kwargs):
            # type: () -> str
            path = f(*args, **kwargs)
            mkdir_p(path, mode=mode)
            return path
        return decorated
    return decorator


TRACKED_TEMPORARY_DIRECTORIES = []


[docs]def create_tracked_tempdir(*args, **kwargs):
    # type: (Any, Any) -> str
    """Create a tracked temporary directory.

    This uses `TemporaryDirectory`, but does not remove the directory when
    the return value goes out of scope, instead registers a handler to cleanup
    on program exit.

    The return value is the path to the created directory.
    """

    tempdir = TemporaryDirectory(*args, **kwargs)
    TRACKED_TEMPORARY_DIRECTORIES.append(tempdir)
    atexit.register(tempdir.cleanup)
    warnings.simplefilter("ignore", ResourceWarning)
    return tempdir.name


[docs]def create_tracked_tempfile(*args, **kwargs):
    # type: (Any, Any) -> str
    """Create a tracked temporary file.

    This uses the `NamedTemporaryFile` construct, but does not remove the file
    until the interpreter exits.

    The return value is the file object.
    """

    return NamedTemporaryFile(*args, **kwargs)


def _find_icacls_exe():
    # type: () -> Optional[Text]
    if os.name == "nt":
        paths = [
            os.path.expandvars(r"%windir%\{0}").format(subdir)
            for subdir in ("system32", "SysWOW64")
        ]
        for path in paths:
            icacls_path = next(
                iter(fn for fn in os.listdir(path) if fn.lower() == "icacls.exe"), None
            )
            if icacls_path is not None:
                icacls_path = os.path.join(path, icacls_path)
                return icacls_path
    return None


[docs]def set_write_bit(fn: str) -> None:
    """Set read-write permissions for the current user on the target path. Fail
    silently if the path doesn't exist.

    :param str fn: The target filename or path
    :return: None
    """
    if not os.path.exists(fn):
        return
    file_stat = os.stat(fn).st_mode
    os.chmod(fn, file_stat | stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)
    if os.name == "nt":
        from ._winconsole import get_current_user

        user_sid = get_current_user()
        icacls_exe = _find_icacls_exe() or "icacls"
        from .misc import run

        if user_sid:
            c = run(
                [
                    icacls_exe,
                    "''{}''".format(fn),
                    "/grant",
                    "{}:WD".format(user_sid),
                    "/T",
                    "/C",
                    "/Q",
                ],
                nospin=True,
                return_object=True,
                # 2020-06-12 Yukihiko Shinoda
                # There are 3 way to get system default encoding in Stack Overflow.
                # see: https://stackoverflow.com/questions/37506535/how-to-get-the-system-default-encoding-in-python-2-x
                # I investigated these way by using Shift-JIS Windows.
                # >>> import locale
                # >>> locale.getpreferredencoding()
                # "cp932" (Shift-JIS)
                # >>> import sys
                # >>> sys.getdefaultencoding()
                # "utf-8"
                # >>> sys.stdout.encoding
                # "UTF8"
                encoding=locale.getpreferredencoding(),
            )
            if not c.err and c.returncode == 0:
                return

    if not os.path.isdir(fn):
        for path in [fn, os.path.dirname(fn)]:
            try:
                os.chflags(path, 0)
            except AttributeError:
                pass
        return None
    for root, dirs, files in os.walk(fn, topdown=False):
        for dir_ in [os.path.join(root, d) for d in dirs]:
            set_write_bit(dir_)
        for file_ in [os.path.join(root, f) for f in files]:
            set_write_bit(file_)


[docs]def rmtree(directory: str,
           ignore_errors: bool = False,
           onerror: Optional[Callable] = None) -> None :
    """Stand-in for :func:`~shutil.rmtree` with additional error-handling.

    This version of `rmtree` handles read-only paths, especially in the case of index
    files written by certain source control systems.

    :param str directory: The target directory to remove
    :param bool ignore_errors: Whether to ignore errors, defaults to False
    :param func onerror: An error handling function, defaults to :func:`handle_remove_readonly`

    .. note::

       Setting `ignore_errors=True` may cause this to silently fail to delete the path
    """

    if onerror is None:
        onerror = handle_remove_readonly
    try:
        shutil.rmtree(directory, ignore_errors=ignore_errors, onerror=onerror)
    except (IOError, OSError, FileNotFoundError, PermissionError) as exc:  # noqa:B014
        # Ignore removal failures where the file doesn't exist
        if exc.errno != errno.ENOENT:
            raise


def _wait_for_files(path):  # pragma: no cover
    # type: (Union[str, TPath]) -> Optional[List[TPath]]
    """Retry with backoff up to 1 second to delete files from a directory.

    :param str path: The path to crawl to delete files from
    :return: A list of remaining paths or None
    :rtype: Optional[List[str]]
    """
    timeout = 0.001
    remaining = []
    while timeout < 1.0:
        remaining = []
        if os.path.isdir(path):
            L = os.listdir(path)
            for target in L:
                _remaining = _wait_for_files(target)
                if _remaining:
                    remaining.extend(_remaining)
            continue
        try:
            os.unlink(path)
        except FileNotFoundError as e:
            if e.errno == errno.ENOENT:
                return
        except (OSError, IOError, PermissionError):  # noqa:B014
            time.sleep(timeout)
            timeout *= 2
            remaining.append(path)
        else:
            return
    return remaining


[docs]def handle_remove_readonly(func, path, exc):
    # type: (Callable[..., str], TPath, Tuple[Type[OSError], OSError, TracebackType]) -> None
    """Error handler for shutil.rmtree.

    Windows source repo folders are read-only by default, so this error handler
    attempts to set them as writeable and then proceed with deletion.

    :param function func: The caller function
    :param str path: The target path for removal
    :param Exception exc: The raised exception

    This function will call check :func:`is_readonly_path` before attempting to call
    :func:`set_write_bit` on the target path and try again.
    """

    PERM_ERRORS = (errno.EACCES, errno.EPERM, errno.ENOENT)
    default_warning_message = "Unable to remove file due to permissions restriction: {!r}"
    # split the initial exception out into its type, exception, and traceback
    exc_type, exc_exception, exc_tb = exc
    if is_readonly_path(path):
        # Apply write permission and call original function
        set_write_bit(path)
        try:
            func(path)
        except (  # noqa:B014
            OSError,
            IOError,
            FileNotFoundError,
            PermissionError,
        ) as e:  # pragma: no cover
            if e.errno in PERM_ERRORS:
                if e.errno == errno.ENOENT:
                    return
                remaining = None
                if os.path.isdir(path):
                    remaining = _wait_for_files(path)
                if remaining:
                    warnings.warn(default_warning_message.format(path), ResourceWarning)
                else:
                    func(path, ignore_errors=True)
                return

    if exc_exception.errno in PERM_ERRORS:
        set_write_bit(path)
        remaining = _wait_for_files(path)
        try:
            func(path)
        except (OSError, IOError, FileNotFoundError, PermissionError) as e:  # noqa:B014
            if e.errno in PERM_ERRORS:
                if e.errno != errno.ENOENT:  # File still exists
                    warnings.warn(default_warning_message.format(path), ResourceWarning)
            return
    else:
        raise exc_exception


[docs]def walk_up(bottom):
    # type: (Union[TPath, str]) -> Generator[Tuple[str, List[str], List[str]], None, None]
    """Mimic os.walk, but walk 'up' instead of down the directory tree.

    From: https://gist.github.com/zdavkeos/1098474
    """
    bottom = os.path.realpath(str(bottom))
    # Get files in current dir.
    try:
        names = os.listdir(bottom)
    except Exception:
        return

    dirs, nondirs = [], []
    for name in names:
        if os.path.isdir(os.path.join(bottom, name)):
            dirs.append(name)
        else:
            nondirs.append(name)
    yield bottom, dirs, nondirs

    new_path = os.path.realpath(os.path.join(bottom, ".."))
    # See if we are at the top.
    if new_path == bottom:
        return

    for x in walk_up(new_path):
        yield x


[docs]def check_for_unc_path(path):
    # type: (Path) -> bool
    """Checks to see if a pathlib `Path` object is a unc path or not."""
    if (
        os.name == "nt"
        and len(path.drive) > 2
        and not path.drive[0].isalpha()
        and path.drive[1] != ":"
    ):
        return True
    else:
        return False


[docs]def get_converted_relative_path(path, relative_to=None):
    # type: (TPath, Optional[TPath]) -> str
    """Convert `path` to be relative.

    Given a vague relative path, return the path relative to the given
    location.

    :param str path: The location of a target path
    :param str relative_to: The starting path to build against, optional
    :returns: A relative posix-style path with a leading `./`

    This performs additional conversion to ensure the result is of POSIX form,
    and starts with `./`, or is precisely `.`.

    >>> os.chdir('/home/user/code/myrepo/myfolder')
    >>> vistir.path.get_converted_relative_path('/home/user/code/file.zip')
    './../../file.zip'
    >>> vistir.path.get_converted_relative_path('/home/user/code/myrepo/myfolder/mysubfolder')
    './mysubfolder'
    >>> vistir.path.get_converted_relative_path('/home/user/code/myrepo/myfolder')
    '.'
    """
    from .misc import to_text, to_bytes  # noqa

    if not relative_to:
        relative_to = os.getcwd()

    path = to_text(path, encoding="utf-8")
    relative_to = to_text(relative_to, encoding="utf-8")
    start_path = Path(relative_to)
    try:
        start = start_path.resolve()
    except OSError:
        start = start_path.absolute()

    # check if there is a drive letter or mount point
    # if it is a mountpoint use the original absolute path
    # instead of the unc path
    if check_for_unc_path(start):
        start = start_path.absolute()

    path = start.joinpath(path).relative_to(start)

    # check and see if the path that was passed into the function is a UNC path
    # and raise value error if it is not.
    if check_for_unc_path(path):
        raise ValueError("The path argument does not currently accept UNC paths")

    relpath_s = to_text(posixpath.normpath(path.as_posix()))
    if not (relpath_s == "." or relpath_s.startswith("./")):
        relpath_s = posixpath.join(".", relpath_s)
    return relpath_s


[docs]def safe_expandvars(value):
    # type: (TPath) -> str
    """Call os.path.expandvars if value is a string, otherwise do nothing."""
    if isinstance(value, str):
        return os.path.expandvars(value)
    return value  # type: ignore