%PDF- %PDF- Mini Shell
Mini Shell

Direktori : /lib/python3/dist-packages/duplicity/backends/
Current File : //lib/python3/dist-packages/duplicity/backends/adbackend.py
# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4; encoding:utf-8 -*-
#
# Copyright 2016 Stefan Breunig <stefan-duplicity@breunig.xyz>
# Based on the backend onedrivebackend.py
#
# This file is part of duplicity.
#
# Duplicity is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# Duplicity is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with duplicity; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA


import json
import os.path
import re
import sys
import time
from io import DEFAULT_BUFFER_SIZE

import duplicity.backend
from duplicity import config
from duplicity import log
from duplicity.errors import BackendException


class ADBackend(duplicity.backend.Backend):
    """
    Backend for Amazon Drive. It communicates directly with Amazon Drive using
    their RESTful API and does not rely on externally setup software (like
    acd_cli).
    """

    OAUTH_TOKEN_PATH = os.path.expanduser("~/.duplicity_ad_oauthtoken.json")

    OAUTH_AUTHORIZE_URL = "https://www.amazon.com/ap/oa"
    OAUTH_TOKEN_URL = "https://api.amazon.com/auth/o2/token"
    # NOTE: Amazon requires https, which is why I am using my domain/setup
    # instead of Duplicity's. Mail me at stefan-duplicity@breunig.xyz once it is
    # available through https and I will whitelist the new URL.
    OAUTH_REDIRECT_URL = "https://breunig.xyz/duplicity/copy.html"
    OAUTH_SCOPE = ["clouddrive:read_other", "clouddrive:write"]

    CLIENT_ID = "amzn1.application-oa2-client.791c9c2d78444e85a32eb66f92eb6bcc"
    CLIENT_SECRET = "5b322c6a37b25f16d848a6a556eddcc30314fc46ae65c87068ff1bc4588d715b"

    MULTIPART_BOUNDARY = "DuplicityFormBoundaryd66364f7f8924f7e9d478e19cf4b871d114a1e00262542"

    def __init__(self, parsed_url):
        duplicity.backend.Backend.__init__(self, parsed_url)

        self.metadata_url = "https://drive.amazonaws.com/drive/v1/"
        self.content_url = "https://content-na.drive.amazonaws.com/cdproxy/"

        self.names_to_ids = {}
        self.backup_target_id = None
        self.backup_target = parsed_url.path.lstrip("/")

        if config.volsize > (10 * 1024 * 1024 * 1024):
            # https://forums.developer.amazon.com/questions/22713/file-size-limits.html
            # https://forums.developer.amazon.com/questions/22038/support-for-chunked-transfer-encoding.html
            log.FatalError(
                "Your --volsize is bigger than 10 GiB, which is the maximum "
                "file size on Amazon Drive that does not require work arounds."
            )

        try:
            global requests
            global OAuth2Session
            import requests
            from requests_oauthlib import OAuth2Session
        except ImportError:
            raise BackendException(
                "Amazon Drive backend requires python-requests and "
                "python-requests-oauthlib to be installed.\n\n"
                "For Debian and derivates use:\n"
                "  apt-get install python-requests python-requests-oauthlib\n"
                "For Fedora and derivates use:\n"
                "  yum install python-requests python-requests-oauthlib"
            )

        self.initialize_oauth2_session()
        self.resolve_backup_target()

    def initialize_oauth2_session(self):
        """Setup or refresh oauth2 session with Amazon Drive"""

        def token_updater(token):
            """Stores oauth2 token on disk"""
            try:
                with open(self.OAUTH_TOKEN_PATH, "w") as f:
                    json.dump(token, f)
            except Exception as err:
                log.Error(
                    f"Could not save the OAuth2 token to {self.OAUTH_TOKEN_PATH}. "
                    f"This means you may need to do the OAuth2 authorization process again soon. "
                    f"Original error: {err}"
                )

        token = None
        try:
            with open(self.OAUTH_TOKEN_PATH) as f:
                token = json.load(f)
        except IOError as err:
            log.Notice(f"Could not load OAuth2 token. Trying to create a new one. (original error: {err})")

        self.http_client = OAuth2Session(
            self.CLIENT_ID,
            scope=self.OAUTH_SCOPE,
            redirect_uri=self.OAUTH_REDIRECT_URL,
            token=token,
            auto_refresh_kwargs={
                "client_id": self.CLIENT_ID,
                "client_secret": self.CLIENT_SECRET,
            },
            auto_refresh_url=self.OAUTH_TOKEN_URL,
            token_updater=token_updater,
        )

        if token is not None:
            self.http_client.refresh_token(self.OAUTH_TOKEN_URL)

        endpoints_response = self.http_client.get(self.metadata_url + "account/endpoint")
        if endpoints_response.status_code != requests.codes.ok:
            token = None

        if token is None:
            if not sys.stdout.isatty() or not sys.stdin.isatty():
                log.FatalError(
                    f"The OAuth2 token could not be loaded from {self.OAUTH_TOKEN_PATH} "
                    f"and you are not running duplicity interactively, so duplicity "
                    f"cannot possibly access Amazon Drive."
                )
            authorization_url, _ = self.http_client.authorization_url(self.OAUTH_AUTHORIZE_URL)

            print("")
            print(
                "In order to allow duplicity to access Amazon Drive, please "
                "open the following URL in a browser and copy the URL of the "
                "page you see after authorization here:"
            )
            print(authorization_url)
            print("")

            redirected_to = (input("URL of the resulting page: ").replace("http://", "https://", 1)).strip()

            token = self.http_client.fetch_token(
                self.OAUTH_TOKEN_URL,
                client_secret=self.CLIENT_SECRET,
                authorization_response=redirected_to,
            )

            endpoints_response = self.http_client.get(self.metadata_url + "account/endpoint")
            endpoints_response.raise_for_status()
            token_updater(token)

        urls = endpoints_response.json()
        if "metadataUrl" not in urls or "contentUrl" not in urls:
            log.FatalError("Could not retrieve endpoint URLs for this account")
        self.metadata_url = urls["metadataUrl"]
        self.content_url = urls["contentUrl"]

    def resolve_backup_target(self):
        """Resolve node id for remote backup target folder"""

        response = self.http_client.get(self.metadata_url + "nodes?filters=kind:FOLDER AND isRoot:true")
        parent_node_id = response.json()["data"][0]["id"]

        for component in [x for x in self.backup_target.split("/") if x]:
            # There doesn't seem to be escaping support, so cut off filter
            # after first unsupported character
            query = re.search("^[A-Za-z0-9_-]*", component).group(0)
            if component != query:
                query = query + "*"

            matches = self.read_all_pages(
                self.metadata_url + f"nodes?filters=kind:FOLDER AND name:{query} AND parents:{parent_node_id}"
            )
            candidates = [f for f in matches if f.get("name") == component]

            if len(candidates) >= 2:
                log.FatalError(
                    f"There are multiple folders with the same name below one parent.\n"
                    f"ParentID: {parent_node_id}\nFolderName: {component}"
                )
            elif len(candidates) == 1:
                parent_node_id = candidates[0]["id"]
            else:
                log.Debug(f"Folder {component} does not exist yet. Creating.")
                parent_node_id = self.mkdir(parent_node_id, component)

        log.Debug(f"Backup target folder has id: {parent_node_id}")
        self.backup_target_id = parent_node_id

    def get_file_id(self, remote_filename):
        """Find id of remote file in backup target folder"""

        if remote_filename not in self.names_to_ids:
            self._list()

        return self.names_to_ids.get(remote_filename)

    def mkdir(self, parent_node_id, folder_name):
        """Create a new folder as a child of a parent node"""

        data = {"name": folder_name, "parents": [parent_node_id], "kind": "FOLDER"}
        response = self.http_client.post(self.metadata_url + "nodes", data=json.dumps(data))
        response.raise_for_status()
        return response.json()["id"]

    def multipart_stream(self, metadata, source_path):
        """Generator for multipart/form-data file upload from source file"""

        boundary = self.MULTIPART_BOUNDARY

        yield str.encode(
            f'--{boundary}\r\nContent-Disposition: form-data; name="metadata"\r\n\r\n'
            + f"{json.dumps(metadata)}\r\n"
            + f"--{boundary}\r\n"
        )
        yield b'Content-Disposition: form-data; name="content"; filename="i_love_backups"\r\n'
        yield b"Content-Type: application/octet-stream\r\n\r\n"

        with source_path.open() as stream:
            while True:
                f = stream.read(DEFAULT_BUFFER_SIZE)
                if f:
                    yield f
                else:
                    break

        yield str.encode(f"\r\n--{boundary}--\r\n" + f"multipart/form-data; boundary={boundary}")

    def read_all_pages(self, url):
        """Iterates over nodes API URL until all pages were read"""

        result = []
        next_token = ""
        token_param = "&startToken=" if "?" in url else "?startToken="

        while True:
            paginated_url = url + token_param + next_token
            response = self.http_client.get(paginated_url)
            if response.status_code != 200:
                raise BackendException(f"Pagination failed with status={response.status_code} on URL={url}")

            parsed = response.json()
            if "data" in parsed and len(parsed["data"]) > 0:
                result.extend(parsed["data"])
            else:
                break

            # Do not make another HTTP request if everything is here already
            if len(result) >= parsed["count"]:
                break

            if "nextToken" not in parsed:
                break
            next_token = parsed["nextToken"]

        return result

    def raise_for_existing_file(self, remote_filename):
        """Report error when file already existed in location and delete it"""

        self._delete(remote_filename)
        raise BackendException(
            f"Upload failed, because there was a file with the same name as {remote_filename} "
            f"already present. The file was deleted, and duplicity will retry the upload "
            f"unless the retry limit has been reached."
        )

    def _put(self, source_path, remote_filename):
        """Upload a local file to Amazon Drive"""

        quota = self.http_client.get(self.metadata_url + "account/quota")
        quota.raise_for_status()
        available = quota.json()["available"]

        source_size = os.path.getsize(source_path.name)

        if source_size > available:
            raise BackendException(
                f'Out of space: trying to store "{source_path.name}" ({int(source_size)} bytes), '
                f"but only {int(available)} bytes available on Amazon Drive."
            )

        # Just check the cached list, to avoid _list for every new file being
        # uploaded
        if remote_filename in self.names_to_ids:
            log.Debug(
                f"File {remote_filename} seems to already exist on Amazon Drive. "
                f"Deleting before attempting to upload it again."
            )
            self._delete(remote_filename)

        metadata = {
            "name": remote_filename,
            "kind": "FILE",
            "parents": [self.backup_target_id],
        }
        headers = {"Content-Type": f"multipart/form-data; boundary={self.MULTIPART_BOUNDARY}"}
        data = self.multipart_stream(metadata, source_path)

        response = self.http_client.post(
            self.content_url + "nodes?suppress=deduplication",
            data=data,
            headers=headers,
        )

        if response.status_code == 409:  # "409 : Duplicate file exists."
            self.raise_for_existing_file(remote_filename)
        elif response.status_code == 201:
            log.Debug(f"{remote_filename} uploaded successfully")
        elif response.status_code == 408 or response.status_code == 504:
            log.Info(
                f"{remote_filename} upload failed with timeout status code={int(response.status_code)}. "
                f"Speculatively waiting for {int(config.timeout)} seconds to see if Amazon Drive "
                f"finished the upload anyway"
            )
            tries = config.timeout / 15
            while tries >= 0:
                tries -= 1
                time.sleep(15)

                remote_size = self._query(remote_filename)["size"]
                if source_size == remote_size:
                    log.Debug("Upload turned out to be successful after all.")
                    return
                elif remote_size == -1:
                    log.Debug(f"Uploaded file is not yet there, {int(tries + 1)} tries left.")
                    continue
                else:
                    self.raise_for_existing_file(remote_filename)
            raise BackendException(f"{remote_filename} upload failed and file did not show up within time limit.")
        else:
            log.Debug(f"{remote_filename} upload returned an undesirable status code {response.status_code}")
            response.raise_for_status()

        parsed = response.json()
        if "id" not in parsed:
            raise BackendException(
                f"{remote_filename} was uploaded but returned JSON does not contain ID of new file. "
                f"Retrying.\nJSON:\n\n{parsed}"
            )

        # XXX: The upload may be considered finished before the file shows up
        # in the file listing. As such, the following is required to avoid race
        # conditions when duplicity calls _query or _list.
        self.names_to_ids[parsed["name"]] = parsed["id"]

    def _get(self, remote_filename, local_path):
        """Download file from Amazon Drive"""

        with local_path.open("wb") as local_file:
            file_id = self.get_file_id(remote_filename)
            if file_id is None:
                raise BackendException(f'File "{remote_filename}" cannot be downloaded: it does not exist')

            response = self.http_client.get(self.content_url + "/nodes/" + file_id + "/content", stream=True)
            response.raise_for_status()
            for chunk in response.iter_content(chunk_size=DEFAULT_BUFFER_SIZE):
                if chunk:
                    local_file.write(chunk)
            local_file.flush()

    def _query(self, remote_filename):
        """Retrieve file size info from Amazon Drive"""

        file_id = self.get_file_id(remote_filename)
        if file_id is None:
            return {"size": -1}
        response = self.http_client.get(self.metadata_url + "nodes/" + file_id)
        response.raise_for_status()

        return {"size": response.json()["contentProperties"]["size"]}

    def _list(self):
        """List files in Amazon Drive backup folder"""

        files = self.read_all_pages(
            self.metadata_url + "nodes/" + self.backup_target_id + "/children?filters=kind:FILE"
        )

        self.names_to_ids = {f["name"]: f["id"] for f in files}

        return list(self.names_to_ids.keys())

    def _delete(self, remote_filename):
        """Delete file from Amazon Drive"""

        file_id = self.get_file_id(remote_filename)
        if file_id is None:
            raise BackendException(f'File "{remote_filename}" cannot be deleted: it does not exist')
        response = self.http_client.put(self.metadata_url + "trash/" + file_id)
        response.raise_for_status()
        del self.names_to_ids[remote_filename]


duplicity.backend.register_backend("ad", ADBackend)
Zerion Mini Shell 1.0