From c2a9c717722257f262d0ce0f5463eb8973d55145 Mon Sep 17 00:00:00 2001 From: myl7 Date: Mon, 11 Dec 2023 14:23:20 +0800 Subject: [PATCH 01/10] Remove unused files and format files --- .circleci/config.yml | 2458 ----------------- .../unittest/linux/scripts/environment.yml | 15 - .circleci/unittest/linux/scripts/install.sh | 29 - .../unittest/linux/scripts/post_process.sh | 8 - .circleci/unittest/linux/scripts/run_test.sh | 9 - .circleci/unittest/linux/scripts/setup_env.sh | 39 - .../unittest/windows/scripts/environment.yml | 15 - .circleci/unittest/windows/scripts/install.sh | 31 - .../windows/scripts/install_conda.bat | 1 - .../unittest/windows/scripts/post_process.sh | 8 - .../unittest/windows/scripts/run_test.sh | 9 - .../unittest/windows/scripts/setup_env.sh | 39 - .../windows/scripts/vc_env_helper.bat | 39 - .github/csprng_architecture.png | Bin 92773 -> 0 bytes .gitignore | 9 +- CODE_OF_CONDUCT.md | 76 - CONTRIBUTING.md | 31 - README.md | 105 +- examples/csprng.ipynb | 226 -- examples/encrypt_decrypt.ipynb | 307 -- packaging/README.md | 90 - packaging/build_conda.sh | 14 - packaging/build_wheel.sh | 54 - packaging/conda/build_csprng.sh | 229 -- packaging/conda/install_conda.bat | 1 - packaging/conda/switch_cuda_version.sh | 28 - packaging/pkg_helpers.bash | 382 --- packaging/torchcsprng/bld.bat | 27 - packaging/torchcsprng/conda_build_config.yaml | 26 - packaging/torchcsprng/meta.yaml | 56 - packaging/vs2017/activate.bat | 44 - packaging/vs2017/conda_build_config.yaml | 24 - packaging/vs2017/install_activate.bat | 30 - packaging/vs2017/install_runtime.bat | 49 - packaging/vs2017/meta.yaml | 24 - packaging/vs2019/activate.bat | 44 - packaging/vs2019/conda_build_config.yaml | 24 - packaging/vs2019/install_activate.bat | 30 - packaging/vs2019/install_runtime.bat | 49 - packaging/vs2019/meta.yaml | 24 - packaging/wheel/linux_manywheel.sh | 62 - packaging/wheel/osx_wheel.sh | 52 - packaging/wheel/relocate.py | 408 --- packaging/windows/azure-pipelines-ci.yml | 11 - packaging/windows/azure-pipelines.yml | 35 - packaging/windows/build_csprng.bat | 145 - packaging/windows/cpu.bat | 37 - packaging/windows/cuda101.bat | 59 - packaging/windows/cuda102.bat | 59 - packaging/windows/cuda92.bat | 59 - packaging/windows/internal/auth.bat | 46 - packaging/windows/internal/build_conda.bat | 15 - packaging/windows/internal/build_wheels.bat | 12 - packaging/windows/internal/check_deps.bat | 67 - packaging/windows/internal/check_opts.bat | 33 - packaging/windows/internal/clean.bat | 5 - packaging/windows/internal/clone.bat | 56 - packaging/windows/internal/copy.bat | 13 - packaging/windows/internal/copy_cpu.bat | 1 - packaging/windows/internal/cuda_install.bat | 201 -- packaging/windows/internal/dep_install.bat | 14 - packaging/windows/internal/env_fix.bat | 31 - .../windows/internal/nightly_defaults.bat | 200 -- packaging/windows/internal/publish.bat | 89 - packaging/windows/internal/setup.bat | 44 - packaging/windows/internal/test.bat | 79 - packaging/windows/internal/upload.bat | 96 - packaging/windows/internal/vc_env_helper.bat | 43 - .../windows/internal/vc_install_helper.sh | 16 - packaging/windows/internal/vs2017_install.ps1 | 25 - packaging/windows/internal/vs2019_install.ps1 | 21 - packaging/windows/internal/vs_install.bat | 14 - packaging/windows/old/cuda100.bat | 59 - packaging/windows/old/cuda90.bat | 59 - packaging/windows/templates/auth_task.yml | 17 - packaging/windows/templates/build_conda.yml | 15 - packaging/windows/templates/build_task.yml | 173 -- packaging/windows/templates/build_wheels.yml | 9 - .../windows/templates/linux_build_task.yml | 38 - .../templates/override_pytorch_version.yml | 6 - .../windows/templates/publish_packages.yml | 8 - .../templates/publish_test_results.yml | 6 - .../templates/setup_env_for_msagent.yml | 25 - .../templates/setup_nightly_variables.yml | 11 - .../windows/templates/upload_to_conda.yml | 10 - packaging/windows/templates/upload_to_s3.yml | 15 - packaging/windows/templates/vsts_auth.yml | 8 - test/__init__.py | 4 - test/test_csprng.py | 654 ----- torchcsprng/__init__.py | 14 - torchcsprng/__init__.pyi | 14 - 91 files changed, 61 insertions(+), 7835 deletions(-) delete mode 100644 .circleci/config.yml delete mode 100644 .circleci/unittest/linux/scripts/environment.yml delete mode 100755 .circleci/unittest/linux/scripts/install.sh delete mode 100755 .circleci/unittest/linux/scripts/post_process.sh delete mode 100755 .circleci/unittest/linux/scripts/run_test.sh delete mode 100755 .circleci/unittest/linux/scripts/setup_env.sh delete mode 100644 .circleci/unittest/windows/scripts/environment.yml delete mode 100644 .circleci/unittest/windows/scripts/install.sh delete mode 100644 .circleci/unittest/windows/scripts/install_conda.bat delete mode 100644 .circleci/unittest/windows/scripts/post_process.sh delete mode 100644 .circleci/unittest/windows/scripts/run_test.sh delete mode 100644 .circleci/unittest/windows/scripts/setup_env.sh delete mode 100644 .circleci/unittest/windows/scripts/vc_env_helper.bat delete mode 100644 .github/csprng_architecture.png delete mode 100644 CODE_OF_CONDUCT.md delete mode 100644 CONTRIBUTING.md delete mode 100644 examples/csprng.ipynb delete mode 100644 examples/encrypt_decrypt.ipynb delete mode 100644 packaging/README.md delete mode 100755 packaging/build_conda.sh delete mode 100755 packaging/build_wheel.sh delete mode 100755 packaging/conda/build_csprng.sh delete mode 100644 packaging/conda/install_conda.bat delete mode 100755 packaging/conda/switch_cuda_version.sh delete mode 100644 packaging/pkg_helpers.bash delete mode 100644 packaging/torchcsprng/bld.bat delete mode 100644 packaging/torchcsprng/conda_build_config.yaml delete mode 100644 packaging/torchcsprng/meta.yaml delete mode 100644 packaging/vs2017/activate.bat delete mode 100644 packaging/vs2017/conda_build_config.yaml delete mode 100644 packaging/vs2017/install_activate.bat delete mode 100644 packaging/vs2017/install_runtime.bat delete mode 100644 packaging/vs2017/meta.yaml delete mode 100644 packaging/vs2019/activate.bat delete mode 100644 packaging/vs2019/conda_build_config.yaml delete mode 100644 packaging/vs2019/install_activate.bat delete mode 100644 packaging/vs2019/install_runtime.bat delete mode 100644 packaging/vs2019/meta.yaml delete mode 100644 packaging/wheel/linux_manywheel.sh delete mode 100644 packaging/wheel/osx_wheel.sh delete mode 100644 packaging/wheel/relocate.py delete mode 100644 packaging/windows/azure-pipelines-ci.yml delete mode 100644 packaging/windows/azure-pipelines.yml delete mode 100644 packaging/windows/build_csprng.bat delete mode 100644 packaging/windows/cpu.bat delete mode 100644 packaging/windows/cuda101.bat delete mode 100644 packaging/windows/cuda102.bat delete mode 100644 packaging/windows/cuda92.bat delete mode 100644 packaging/windows/internal/auth.bat delete mode 100644 packaging/windows/internal/build_conda.bat delete mode 100644 packaging/windows/internal/build_wheels.bat delete mode 100644 packaging/windows/internal/check_deps.bat delete mode 100644 packaging/windows/internal/check_opts.bat delete mode 100644 packaging/windows/internal/clean.bat delete mode 100644 packaging/windows/internal/clone.bat delete mode 100644 packaging/windows/internal/copy.bat delete mode 100644 packaging/windows/internal/copy_cpu.bat delete mode 100644 packaging/windows/internal/cuda_install.bat delete mode 100644 packaging/windows/internal/dep_install.bat delete mode 100644 packaging/windows/internal/env_fix.bat delete mode 100644 packaging/windows/internal/nightly_defaults.bat delete mode 100644 packaging/windows/internal/publish.bat delete mode 100644 packaging/windows/internal/setup.bat delete mode 100644 packaging/windows/internal/test.bat delete mode 100644 packaging/windows/internal/upload.bat delete mode 100644 packaging/windows/internal/vc_env_helper.bat delete mode 100644 packaging/windows/internal/vc_install_helper.sh delete mode 100644 packaging/windows/internal/vs2017_install.ps1 delete mode 100644 packaging/windows/internal/vs2019_install.ps1 delete mode 100644 packaging/windows/internal/vs_install.bat delete mode 100644 packaging/windows/old/cuda100.bat delete mode 100644 packaging/windows/old/cuda90.bat delete mode 100644 packaging/windows/templates/auth_task.yml delete mode 100644 packaging/windows/templates/build_conda.yml delete mode 100644 packaging/windows/templates/build_task.yml delete mode 100644 packaging/windows/templates/build_wheels.yml delete mode 100644 packaging/windows/templates/linux_build_task.yml delete mode 100644 packaging/windows/templates/override_pytorch_version.yml delete mode 100644 packaging/windows/templates/publish_packages.yml delete mode 100644 packaging/windows/templates/publish_test_results.yml delete mode 100644 packaging/windows/templates/setup_env_for_msagent.yml delete mode 100644 packaging/windows/templates/setup_nightly_variables.yml delete mode 100644 packaging/windows/templates/upload_to_conda.yml delete mode 100644 packaging/windows/templates/upload_to_s3.yml delete mode 100644 packaging/windows/templates/vsts_auth.yml delete mode 100644 test/__init__.py delete mode 100644 test/test_csprng.py delete mode 100644 torchcsprng/__init__.py delete mode 100644 torchcsprng/__init__.pyi diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index acd6ffa..0000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,2458 +0,0 @@ -version: 2.1 - -executors: - windows-cpu: - machine: - resource_class: windows.xlarge - image: windows-server-2019-vs2019:stable - shell: bash.exe - - windows-gpu: - machine: - resource_class: windows.gpu.nvidia.medium - image: windows-server-2019-nvidia:stable - shell: bash.exe - -commands: - - checkout_merge: - description: "checkout merge branch" - steps: - - checkout - # - run: - # name: Checkout merge branch - # command: | - # set -ex - # BRANCH=$(git rev-parse --abbrev-ref HEAD) - # if [[ "$BRANCH" != "master" ]]; then - # git fetch --force origin ${CIRCLE_BRANCH}/merge:merged/${CIRCLE_BRANCH} - # git checkout "merged/$CIRCLE_BRANCH" - # fi - - designate_upload_channel: - description: "inserts the correct upload channel into ${BASH_ENV}" - steps: - - run: - name: adding UPLOAD_CHANNEL to BASH_ENV - command: | - our_upload_channel=nightly - # On tags upload to test instead - if [[ -n "${CIRCLE_TAG}" ]]; then - our_upload_channel=test - fi - echo "export UPLOAD_CHANNEL=${our_upload_channel}" >> ${BASH_ENV} - -binary_common: &binary_common - parameters: - # Edit these defaults to do a release` - build_version: - description: "version number of release binary; by default, build a nightly" - type: string - default: "" - pytorch_version: - description: "PyTorch version to build against; by default, use a nightly" - type: string - default: "" - # Don't edit these - python_version: - description: "Python version to build against (e.g., 3.7)" - type: string - cu_version: - description: "CUDA version to build against, in CU format (e.g., cpu or cu100)" - type: string - unicode_abi: - description: "Python 2.7 wheel only: whether or not we are cp27mu (default: no)" - type: string - default: "" - wheel_docker_image: - description: "Wheel only: what docker image to use" - type: string - default: "pytorch/manylinux-cuda101" - environment: - PYTHON_VERSION: << parameters.python_version >> - PYTORCH_VERSION: << parameters.pytorch_version >> - UNICODE_ABI: << parameters.unicode_abi >> - CU_VERSION: << parameters.cu_version >> - -jobs: - - binary_linux_wheel: - <<: *binary_common - docker: - - image: << parameters.wheel_docker_image >> - resource_class: 2xlarge+ - steps: - - checkout_merge - - run: packaging/build_wheel.sh - - store_artifacts: - path: dist - - persist_to_workspace: - root: dist - paths: - - "*" - - binary_linux_conda: - <<: *binary_common - docker: - - image: "pytorch/conda-cuda" - resource_class: 2xlarge+ - steps: - - checkout_merge - - run: - no_output_timeout: 20m - command: packaging/build_conda.sh - - store_artifacts: - path: /opt/conda/conda-bld/linux-64 - - persist_to_workspace: - root: /opt/conda/conda-bld/linux-64 - paths: - - "*" - - store_test_results: - path: build_results/ - - binary_win_conda: - <<: *binary_common - executor: windows-cpu - steps: - - checkout_merge - - run: - name: Build conda packages - no_output_timeout: 20m - command: | - set -ex - source packaging/windows/internal/vc_install_helper.sh - packaging/windows/internal/cuda_install.bat - eval "$('/C/tools/miniconda3/Scripts/conda.exe' 'shell.bash' 'hook')" - conda activate base - conda install -yq conda-build "conda-package-handling!=1.5.0" - packaging/build_conda.sh - rm /C/tools/miniconda3/conda-bld/win-64/vs${VC_YEAR}*.tar.bz2 - - store_artifacts: - path: C:/tools/miniconda3/conda-bld/win-64 - - persist_to_workspace: - root: C:/tools/miniconda3/conda-bld/win-64 - paths: - - "*" - - store_test_results: - path: build_results/ - - binary_win_wheel: - <<: *binary_common - executor: windows-cpu - steps: - - checkout_merge - - run: - name: Build wheel packages - command: | - set -ex - source packaging/windows/internal/vc_install_helper.sh - packaging/windows/internal/cuda_install.bat - packaging/build_wheel.sh - - store_artifacts: - path: dist - - persist_to_workspace: - root: dist - paths: - - "*" - - store_test_results: - path: build_results/ - - binary_macos_wheel: - <<: *binary_common - macos: - xcode: "12.0" - steps: - - checkout_merge -# - run: -# name: Install libomp -# command: HOMEBREW_NO_AUTO_UPDATE=1 brew install libomp -# # Disable brew auto update which is very slow - - run: - # Cannot easily deduplicate this as source'ing activate - # will set environment variables which we need to propagate - # to build_wheel.sh - command: | - curl -o conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh - sh conda.sh -b - source $HOME/miniconda3/bin/activate - packaging/build_wheel.sh - - store_artifacts: - path: dist - - persist_to_workspace: - root: dist - paths: - - "*" - - binary_macos_conda: - <<: *binary_common - macos: - xcode: "12.0" - steps: - - checkout_merge -# - run: -# name: Install libomp -# command: HOMEBREW_NO_AUTO_UPDATE=1 brew install libomp -# # Disable brew auto update which is very slow - - run: - command: | - curl -o conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh - sh conda.sh -b - source $HOME/miniconda3/bin/activate - conda install -yq conda-build - packaging/build_conda.sh - - store_artifacts: - path: /Users/distiller/miniconda3/conda-bld/osx-64 - - persist_to_workspace: - root: /Users/distiller/miniconda3/conda-bld/osx-64 - paths: - - "*" - - store_test_results: - path: build_results/ - - # Requires org-member context - binary_conda_upload: - docker: - - image: continuumio/miniconda - steps: - - attach_workspace: - at: ~/workspace - - designate_upload_channel - - run: - command: | - # Prevent credential from leaking - conda install -yq anaconda-client - set -x - anaconda -t "${CONDA_PYTORCHBOT_TOKEN}" upload ~/workspace/*.tar.bz2 -u "pytorch-${UPLOAD_CHANNEL}" --label main --no-progress --force - # Requires org-member context - binary_wheel_upload: - parameters: - subfolder: - description: "What whl subfolder to upload to, e.g., blank or cu100/ (trailing slash is important)" - type: string - docker: - - image: circleci/python:3.7 - steps: - - attach_workspace: - at: ~/workspace - - designate_upload_channel - - checkout - - run: - command: | - pip install --user awscli - export PATH="$HOME/.local/bin:$PATH" - # Prevent credential from leaking - set +x - export AWS_ACCESS_KEY_ID="${PYTORCH_BINARY_AWS_ACCESS_KEY_ID}" - export AWS_SECRET_ACCESS_KEY="${PYTORCH_BINARY_AWS_SECRET_ACCESS_KEY}" - set -x - for pkg in ~/workspace/*.whl; do - aws s3 cp "$pkg" "s3://pytorch/whl/${UPLOAD_CHANNEL}/<< parameters.subfolder >>" --acl public-read - done - - unittest_linux_cpu: - <<: *binary_common - docker: - - image: "pytorch/manylinux-cuda102" - resource_class: 2xlarge+ - steps: - - checkout - - run: - name: Generate cache key - # This will refresh cache on Sundays, nightly build should generate new cache. - command: echo "$(date +"%Y-%U")" > .circleci-weekly - - restore_cache: - - keys: - - env-v2-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} - - - run: - name: Setup - command: .circleci/unittest/linux/scripts/setup_env.sh - - save_cache: - - key: env-v2-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} - - paths: - - conda - - env - - run: - name: Install torchcsprng - command: .circleci/unittest/linux/scripts/install.sh - - run: - name: Run tests - command: .circleci/unittest/linux/scripts/run_test.sh - - run: - name: Post process - command: .circleci/unittest/linux/scripts/post_process.sh - - store_test_results: - path: test-results - - unittest_linux_gpu: - <<: *binary_common - machine: - image: ubuntu-1604-cuda-10.1:201909-23 - resource_class: gpu.nvidia.small.multi - environment: - image_name: "pytorch/manylinux-cuda101" - PYTHON_VERSION: << parameters.python_version >> - steps: - - checkout - - run: - name: Generate cache key - # This will refresh cache on Sundays, nightly build should generate new cache. - command: echo "$(date +"%Y-%U")" > .circleci-weekly - - restore_cache: - - keys: - - env-v2-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} - - - run: - name: Setup - command: docker run -e PYTHON_VERSION -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/setup_env.sh - - save_cache: - - key: env-v2-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} - - paths: - - conda - - env - - run: - name: Install torchcsprng - command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/install.sh - - run: - name: Run tests - command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/run_test.sh - - run: - name: Post Process - command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/post_process.sh - - store_test_results: - path: test-results - - unittest_windows_cpu: - <<: *binary_common - executor: - name: windows-cpu - steps: - - checkout - - run: - name: Generate cache key - # This will refresh cache on Sundays, nightly build should generate new cache. - command: echo "$(date +"%Y-%U")" > .circleci-weekly - - restore_cache: - - keys: - - env-v2-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} - - - run: - name: Setup - command: .circleci/unittest/windows/scripts/setup_env.sh - - save_cache: - - key: env-v2-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} - - paths: - - conda - - env - - run: - name: Install torchcsprng - command: .circleci/unittest/windows/scripts/install.sh - - run: - name: Run tests - command: .circleci/unittest/windows/scripts/run_test.sh - - run: - name: Post process - command: .circleci/unittest/windows/scripts/post_process.sh - - store_test_results: - path: test-results - - unittest_windows_gpu: - <<: *binary_common - executor: - name: windows-gpu - environment: - CUDA_VERSION: "10.1" - PYTHON_VERSION: << parameters.python_version >> - steps: - - checkout - - run: - name: Generate cache key - # This will refresh cache on Sundays, nightly build should generate new cache. - command: echo "$(date +"%Y-%U")" > .circleci-weekly - - restore_cache: - - keys: - - env-v1-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} - - - run: - name: Setup - command: .circleci/unittest/windows/scripts/setup_env.sh - - save_cache: - - key: env-v1-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} - - paths: - - conda - - env - - run: - name: Install torchcsprng - command: .circleci/unittest/windows/scripts/install.sh - - run: - name: Run tests - command: .circleci/unittest/windows/scripts/run_test.sh - - run: - name: Post process - command: .circleci/unittest/windows/scripts/post_process.sh - - store_test_results: - path: test-results - - unittest_macos_cpu: - <<: *binary_common - macos: - xcode: "12.0" - resource_class: large - steps: - - checkout - - designate_upload_channel - - run: - name: Install wget - command: HOMEBREW_NO_AUTO_UPDATE=1 brew install wget - # Disable brew auto update which is very slow -# - run: -# name: Install libomp -# command: HOMEBREW_NO_AUTO_UPDATE=1 brew install libomp -# # Disable brew auto update which is very slow - - run: - name: Generate cache key - # This will refresh cache on Sundays, nightly build should generate new cache. - command: echo "$(date +"%Y-%U")" > .circleci-weekly - - restore_cache: - - keys: - - env-v3-macos-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} - - - run: - name: Setup - command: .circleci/unittest/linux/scripts/setup_env.sh - - save_cache: - - key: env-v3-macos-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} - - paths: - - conda - - env - - run: - name: Install torchcsprng - command: .circleci/unittest/linux/scripts/install.sh - - run: - name: Run tests - command: .circleci/unittest/linux/scripts/run_test.sh - - run: - name: Post process - command: .circleci/unittest/linux/scripts/post_process.sh - - store_test_results: - path: test-results - -workflows: - build: - jobs: -# - circleci_consistency - - binary_linux_wheel: - cu_version: cpu - name: binary_linux_wheel_py3.6_cpu - python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_linux_wheel: - cu_version: cu101 - name: binary_linux_wheel_py3.6_cu101 - python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda101 - - binary_linux_wheel: - cu_version: cu102 - name: binary_linux_wheel_py3.6_cu102 - python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_linux_wheel: - cu_version: cu111 - name: binary_linux_wheel_py3.6_cu111 - python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda111 - - binary_linux_wheel: - cu_version: cpu - name: binary_linux_wheel_py3.7_cpu - python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_linux_wheel: - cu_version: cu101 - name: binary_linux_wheel_py3.7_cu101 - python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda101 - - binary_linux_wheel: - cu_version: cu102 - name: binary_linux_wheel_py3.7_cu102 - python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_linux_wheel: - cu_version: cu111 - name: binary_linux_wheel_py3.7_cu111 - python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda111 - - binary_linux_wheel: - cu_version: cpu - name: binary_linux_wheel_py3.8_cpu - python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_linux_wheel: - cu_version: cu101 - name: binary_linux_wheel_py3.8_cu101 - python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda101 - - binary_linux_wheel: - cu_version: cu102 - name: binary_linux_wheel_py3.8_cu102 - python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_linux_wheel: - cu_version: cu111 - name: binary_linux_wheel_py3.8_cu111 - python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda111 - - binary_linux_wheel: - cu_version: cpu - name: binary_linux_wheel_py3.9_cpu - python_version: '3.9' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_linux_wheel: - cu_version: cu101 - name: binary_linux_wheel_py3.9_cu101 - python_version: '3.9' - wheel_docker_image: pytorch/manylinux-cuda101 - - binary_linux_wheel: - cu_version: cu102 - name: binary_linux_wheel_py3.9_cu102 - python_version: '3.9' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_linux_wheel: - cu_version: cu111 - name: binary_linux_wheel_py3.9_cu111 - python_version: '3.9' - wheel_docker_image: pytorch/manylinux-cuda111 - - binary_macos_wheel: - cu_version: cpu - name: binary_macos_wheel_py3.6_cpu - python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_macos_wheel: - cu_version: cpu - name: binary_macos_wheel_py3.7_cpu - python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_macos_wheel: - cu_version: cpu - name: binary_macos_wheel_py3.8_cpu - python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_macos_wheel: - cu_version: cpu - name: binary_macos_wheel_py3.9_cpu - python_version: '3.9' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_win_wheel: - cu_version: cpu - filters: - branches: - only: master - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: binary_win_wheel_py3.6_cpu - python_version: '3.6' - - binary_win_wheel: - cu_version: cu101 - filters: - branches: - only: master - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: binary_win_wheel_py3.6_cu101 - python_version: '3.6' - - binary_win_wheel: - cu_version: cu102 - filters: - branches: - only: master - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: binary_win_wheel_py3.6_cu102 - python_version: '3.6' - - binary_win_wheel: - cu_version: cu111 - filters: - branches: - only: master - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: binary_win_wheel_py3.6_cu111 - python_version: '3.6' - - binary_win_wheel: - cu_version: cpu - filters: - branches: - only: master - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: binary_win_wheel_py3.7_cpu - python_version: '3.7' - - binary_win_wheel: - cu_version: cu101 - filters: - branches: - only: master - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: binary_win_wheel_py3.7_cu101 - python_version: '3.7' - - binary_win_wheel: - cu_version: cu102 - filters: - branches: - only: master - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: binary_win_wheel_py3.7_cu102 - python_version: '3.7' - - binary_win_wheel: - cu_version: cu111 - filters: - branches: - only: master - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: binary_win_wheel_py3.7_cu111 - python_version: '3.7' - - binary_win_wheel: - cu_version: cpu - name: binary_win_wheel_py3.8_cpu - python_version: '3.8' - - binary_win_wheel: - cu_version: cu101 - filters: - branches: - only: master - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: binary_win_wheel_py3.8_cu101 - python_version: '3.8' - - binary_win_wheel: - cu_version: cu102 - filters: - branches: - only: master - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: binary_win_wheel_py3.8_cu102 - python_version: '3.8' - - binary_win_wheel: - cu_version: cu111 - name: binary_win_wheel_py3.8_cu111 - python_version: '3.8' - - binary_win_wheel: - cu_version: cpu - name: binary_win_wheel_py3.9_cpu - python_version: '3.9' - - binary_win_wheel: - cu_version: cu101 - filters: - branches: - only: master - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: binary_win_wheel_py3.9_cu101 - python_version: '3.9' - - binary_win_wheel: - cu_version: cu102 - filters: - branches: - only: master - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: binary_win_wheel_py3.9_cu102 - python_version: '3.9' - - binary_win_wheel: - cu_version: cu111 - name: binary_win_wheel_py3.9_cu111 - python_version: '3.9' - - binary_linux_conda: - cu_version: cpu - name: binary_linux_conda_py3.6_cpu - python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_linux_conda: - cu_version: cu101 - name: binary_linux_conda_py3.6_cu101 - python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda101 - - binary_linux_conda: - cu_version: cu102 - name: binary_linux_conda_py3.6_cu102 - python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_linux_conda: - cu_version: cu111 - name: binary_linux_conda_py3.6_cu111 - python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda111 - - binary_linux_conda: - cu_version: cpu - name: binary_linux_conda_py3.7_cpu - python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_linux_conda: - cu_version: cu101 - name: binary_linux_conda_py3.7_cu101 - python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda101 - - binary_linux_conda: - cu_version: cu102 - name: binary_linux_conda_py3.7_cu102 - python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_linux_conda: - cu_version: cu111 - name: binary_linux_conda_py3.7_cu111 - python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda111 - - binary_linux_conda: - cu_version: cpu - name: binary_linux_conda_py3.8_cpu - python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_linux_conda: - cu_version: cu101 - name: binary_linux_conda_py3.8_cu101 - python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda101 - - binary_linux_conda: - cu_version: cu102 - name: binary_linux_conda_py3.8_cu102 - python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_linux_conda: - cu_version: cu111 - name: binary_linux_conda_py3.8_cu111 - python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda111 - - binary_linux_conda: - cu_version: cpu - name: binary_linux_conda_py3.9_cpu - python_version: '3.9' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_linux_conda: - cu_version: cu101 - name: binary_linux_conda_py3.9_cu101 - python_version: '3.9' - wheel_docker_image: pytorch/manylinux-cuda101 - - binary_linux_conda: - cu_version: cu102 - name: binary_linux_conda_py3.9_cu102 - python_version: '3.9' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_linux_conda: - cu_version: cu111 - name: binary_linux_conda_py3.9_cu111 - python_version: '3.9' - wheel_docker_image: pytorch/manylinux-cuda111 - - binary_macos_conda: - cu_version: cpu - name: binary_macos_conda_py3.6_cpu - python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_macos_conda: - cu_version: cpu - name: binary_macos_conda_py3.7_cpu - python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_macos_conda: - cu_version: cpu - name: binary_macos_conda_py3.8_cpu - python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda102 -# - binary_macos_conda: -# cu_version: cpu -# name: binary_macos_conda_py3.9_cpu -# python_version: '3.9' -# wheel_docker_image: pytorch/manylinux-cuda102 - - binary_win_conda: - cu_version: cpu - filters: - branches: - only: master - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: binary_win_conda_py3.6_cpu - python_version: '3.6' - - binary_win_conda: - cu_version: cu101 - filters: - branches: - only: master - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: binary_win_conda_py3.6_cu101 - python_version: '3.6' - - binary_win_conda: - cu_version: cu102 - filters: - branches: - only: master - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: binary_win_conda_py3.6_cu102 - python_version: '3.6' - - binary_win_conda: - cu_version: cu111 - filters: - branches: - only: master - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: binary_win_conda_py3.6_cu111 - python_version: '3.6' - - binary_win_conda: - cu_version: cpu - filters: - branches: - only: master - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: binary_win_conda_py3.7_cpu - python_version: '3.7' - - binary_win_conda: - cu_version: cu101 - filters: - branches: - only: master - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: binary_win_conda_py3.7_cu101 - python_version: '3.7' - - binary_win_conda: - cu_version: cu102 - filters: - branches: - only: master - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: binary_win_conda_py3.7_cu102 - python_version: '3.7' - - binary_win_conda: - cu_version: cu111 - filters: - branches: - only: master - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: binary_win_conda_py3.7_cu111 - python_version: '3.7' - - binary_win_conda: - cu_version: cpu - name: binary_win_conda_py3.8_cpu - python_version: '3.8' - - binary_win_conda: - cu_version: cu101 - filters: - branches: - only: master - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: binary_win_conda_py3.8_cu101 - python_version: '3.8' - - binary_win_conda: - cu_version: cu102 - filters: - branches: - only: master - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: binary_win_conda_py3.8_cu102 - python_version: '3.8' - - binary_win_conda: - cu_version: cu111 - name: binary_win_conda_py3.8_cu111 - python_version: '3.8' - - binary_win_conda: - cu_version: cpu - name: binary_win_conda_py3.9_cpu - python_version: '3.9' - - binary_win_conda: - cu_version: cu101 - filters: - branches: - only: master - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: binary_win_conda_py3.9_cu101 - python_version: '3.9' - - binary_win_conda: - cu_version: cu102 - filters: - branches: - only: master - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: binary_win_conda_py3.9_cu102 - python_version: '3.9' - - binary_win_conda: - cu_version: cu111 - name: binary_win_conda_py3.9_cu111 - python_version: '3.9' -# - python_lint -# - python_type_check -# - clang_format - - unittest: - jobs: - - unittest_linux_cpu: - cu_version: cpu - name: unittest_linux_cpu_py3.6 - python_version: '3.6' - - unittest_linux_cpu: - cu_version: cpu - name: unittest_linux_cpu_py3.7 - python_version: '3.7' - - unittest_linux_cpu: - cu_version: cpu - name: unittest_linux_cpu_py3.8 - python_version: '3.8' - - unittest_linux_gpu: - cu_version: cu101 - filters: - branches: - only: - - master - - nightly - name: unittest_linux_gpu_py3.6 - python_version: '3.6' - - unittest_linux_gpu: - cu_version: cu101 - filters: - branches: - only: - - master - - nightly - name: unittest_linux_gpu_py3.7 - python_version: '3.7' - - unittest_linux_gpu: - cu_version: cu101 - name: unittest_linux_gpu_py3.8 - python_version: '3.8' - - unittest_linux_gpu: - cu_version: cu101 - name: unittest_linux_gpu_py3.9 - python_version: '3.9' - - unittest_windows_cpu: - cu_version: cpu - name: unittest_windows_cpu_py3.6 - python_version: '3.6' - - unittest_windows_cpu: - cu_version: cpu - name: unittest_windows_cpu_py3.7 - python_version: '3.7' - - unittest_windows_cpu: - cu_version: cpu - name: unittest_windows_cpu_py3.8 - python_version: '3.8' - - unittest_windows_cpu: - cu_version: cpu - name: unittest_windows_cpu_py3.9 - python_version: '3.9' - - unittest_windows_gpu: - cu_version: cu101 - filters: - branches: - only: - - master - - nightly - name: unittest_windows_gpu_py3.6 - python_version: '3.6' - - unittest_windows_gpu: - cu_version: cu101 - filters: - branches: - only: - - master - - nightly - name: unittest_windows_gpu_py3.7 - python_version: '3.7' - - unittest_windows_gpu: - cu_version: cu101 - name: unittest_windows_gpu_py3.8 - python_version: '3.8' - - unittest_windows_gpu: - cu_version: cu101 - name: unittest_windows_gpu_py3.9 - python_version: '3.9' - - unittest_macos_cpu: - cu_version: cpu - name: unittest_macos_cpu_py3.6 - python_version: '3.6' - - unittest_macos_cpu: - cu_version: cpu - name: unittest_macos_cpu_py3.7 - python_version: '3.7' - - unittest_macos_cpu: - cu_version: cpu - name: unittest_macos_cpu_py3.8 - python_version: '3.8' -# - unittest_macos_cpu: -# cu_version: cpu -# name: unittest_macos_cpu_py3.9 -# python_version: '3.9' - nightly: - jobs: -# - circleci_consistency -# - python_lint -# - python_type_check -# - clang_format - - binary_linux_wheel: - cu_version: cpu - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.6_cpu - python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.6_cpu_upload - requires: - - nightly_binary_linux_wheel_py3.6_cpu - subfolder: cpu/ - - binary_linux_wheel: - cu_version: cu101 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.6_cu101 - python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda101 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.6_cu101_upload - requires: - - nightly_binary_linux_wheel_py3.6_cu101 - subfolder: cu101/ - - binary_linux_wheel: - cu_version: cu102 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.6_cu102 - python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.6_cu102_upload - requires: - - nightly_binary_linux_wheel_py3.6_cu102 - subfolder: cu102/ - - binary_linux_wheel: - cu_version: cu111 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.6_cu111 - python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda111 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.6_cu111_upload - requires: - - nightly_binary_linux_wheel_py3.6_cu111 - subfolder: cu111/ - - binary_linux_wheel: - cu_version: cpu - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.7_cpu - python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.7_cpu_upload - requires: - - nightly_binary_linux_wheel_py3.7_cpu - subfolder: cpu/ - - binary_linux_wheel: - cu_version: cu101 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.7_cu101 - python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda101 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.7_cu101_upload - requires: - - nightly_binary_linux_wheel_py3.7_cu101 - subfolder: cu101/ - - binary_linux_wheel: - cu_version: cu102 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.7_cu102 - python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.7_cu102_upload - requires: - - nightly_binary_linux_wheel_py3.7_cu102 - subfolder: cu102/ - - binary_linux_wheel: - cu_version: cu111 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.7_cu111 - python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda111 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.7_cu111_upload - requires: - - nightly_binary_linux_wheel_py3.7_cu111 - subfolder: cu111/ - - binary_linux_wheel: - cu_version: cpu - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.8_cpu - python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.8_cpu_upload - requires: - - nightly_binary_linux_wheel_py3.8_cpu - subfolder: cpu/ - - binary_linux_wheel: - cu_version: cu101 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.8_cu101 - python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda101 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.8_cu101_upload - requires: - - nightly_binary_linux_wheel_py3.8_cu101 - subfolder: cu101/ - - binary_linux_wheel: - cu_version: cu102 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.8_cu102 - python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.8_cu102_upload - requires: - - nightly_binary_linux_wheel_py3.8_cu102 - subfolder: cu102/ - - binary_linux_wheel: - cu_version: cu111 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.8_cu111 - python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda111 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.8_cu111_upload - requires: - - nightly_binary_linux_wheel_py3.8_cu111 - subfolder: cu111/ - - binary_linux_wheel: - cu_version: cpu - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.9_cpu - python_version: '3.9' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py39_cpu_upload - requires: - - nightly_binary_linux_wheel_py3.9_cpu - subfolder: cpu/ - - binary_linux_wheel: - cu_version: cu101 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.9_cu101 - python_version: '3.9' - wheel_docker_image: pytorch/manylinux-cuda101 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.9_cu101_upload - requires: - - nightly_binary_linux_wheel_py3.9_cu101 - subfolder: cu101/ - - binary_linux_wheel: - cu_version: cu102 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.9_cu102 - python_version: '3.9' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.9_cu102_upload - requires: - - nightly_binary_linux_wheel_py3.9_cu102 - subfolder: cu102/ - - binary_linux_wheel: - cu_version: cu111 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.9_cu111 - python_version: '3.9' - wheel_docker_image: pytorch/manylinux-cuda111 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.9_cu111_upload - requires: - - nightly_binary_linux_wheel_py3.9_cu111 - subfolder: cu111/ - - binary_macos_wheel: - cu_version: cpu - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_macos_wheel_py3.6_cpu - python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_macos_wheel_py3.6_cpu_upload - requires: - - nightly_binary_macos_wheel_py3.6_cpu - subfolder: '' - - binary_macos_wheel: - cu_version: cpu - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_macos_wheel_py3.7_cpu - python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_macos_wheel_py3.7_cpu_upload - requires: - - nightly_binary_macos_wheel_py3.7_cpu - subfolder: '' - - binary_macos_wheel: - cu_version: cpu - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_macos_wheel_py3.8_cpu - python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_macos_wheel_py3.8_cpu_upload - requires: - - nightly_binary_macos_wheel_py3.8_cpu - subfolder: '' - - binary_macos_wheel: - cu_version: cpu - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_macos_wheel_py3.9_cpu - python_version: '3.9' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_macos_wheel_py3.9_cpu_upload - requires: - - nightly_binary_macos_wheel_py3.9_cpu - subfolder: '' - - binary_win_wheel: - cu_version: cpu - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.6_cpu - python_version: '3.6' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.6_cpu_upload - requires: - - nightly_binary_win_wheel_py3.6_cpu - subfolder: cpu/ - - binary_win_wheel: - cu_version: cu101 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.6_cu101 - python_version: '3.6' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.6_cu101_upload - requires: - - nightly_binary_win_wheel_py3.6_cu101 - subfolder: cu101/ - - binary_win_wheel: - cu_version: cu102 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.6_cu102 - python_version: '3.6' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.6_cu102_upload - requires: - - nightly_binary_win_wheel_py3.6_cu102 - subfolder: cu102/ - - binary_win_wheel: - cu_version: cu111 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.6_cu111 - python_version: '3.6' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.6_cu111_upload - requires: - - nightly_binary_win_wheel_py3.6_cu111 - subfolder: cu111/ - - binary_win_wheel: - cu_version: cpu - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.7_cpu - python_version: '3.7' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.7_cpu_upload - requires: - - nightly_binary_win_wheel_py3.7_cpu - subfolder: cpu/ - - binary_win_wheel: - cu_version: cu101 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.7_cu101 - python_version: '3.7' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.7_cu101_upload - requires: - - nightly_binary_win_wheel_py3.7_cu101 - subfolder: cu101/ - - binary_win_wheel: - cu_version: cu102 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.7_cu102 - python_version: '3.7' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.7_cu102_upload - requires: - - nightly_binary_win_wheel_py3.7_cu102 - subfolder: cu102/ - - binary_win_wheel: - cu_version: cu111 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.7_cu111 - python_version: '3.7' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.7_cu111_upload - requires: - - nightly_binary_win_wheel_py3.7_cu111 - subfolder: cu111/ - - binary_win_wheel: - cu_version: cpu - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.8_cpu - python_version: '3.8' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.8_cpu_upload - requires: - - nightly_binary_win_wheel_py3.8_cpu - subfolder: cpu/ - - binary_win_wheel: - cu_version: cu101 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.8_cu101 - python_version: '3.8' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.8_cu101_upload - requires: - - nightly_binary_win_wheel_py3.8_cu101 - subfolder: cu101/ - - binary_win_wheel: - cu_version: cu102 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.8_cu102 - python_version: '3.8' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.8_cu102_upload - requires: - - nightly_binary_win_wheel_py3.8_cu102 - subfolder: cu102/ - - binary_win_wheel: - cu_version: cu111 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.8_cu111 - python_version: '3.8' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.8_cu111_upload - requires: - - nightly_binary_win_wheel_py3.8_cu111 - subfolder: cu111/ - - binary_win_wheel: - cu_version: cpu - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.9_cpu - python_version: '3.9' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.9_cpu_upload - requires: - - nightly_binary_win_wheel_py3.9_cpu - subfolder: cpu/ - - binary_win_wheel: - cu_version: cu101 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.9_cu101 - python_version: '3.9' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.9_cu101_upload - requires: - - nightly_binary_win_wheel_py3.9_cu101 - subfolder: cu101/ - - binary_win_wheel: - cu_version: cu102 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.9_cu102 - python_version: '3.9' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.9_cu102_upload - requires: - - nightly_binary_win_wheel_py3.9_cu102 - subfolder: cu102/ - - binary_win_wheel: - cu_version: cu111 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.9_cu111 - python_version: '3.9' - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_wheel_py3.9_cu111_upload - requires: - - nightly_binary_win_wheel_py3.9_cu111 - subfolder: cu111/ - - binary_linux_conda: - cu_version: cpu - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.6_cpu - python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.6_cpu_upload - requires: - - nightly_binary_linux_conda_py3.6_cpu - - binary_linux_conda: - cu_version: cu101 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.6_cu101 - python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda101 - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.6_cu101_upload - requires: - - nightly_binary_linux_conda_py3.6_cu101 - - binary_linux_conda: - cu_version: cu102 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.6_cu102 - python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.6_cu102_upload - requires: - - nightly_binary_linux_conda_py3.6_cu102 - - binary_linux_conda: - cu_version: cu111 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.6_cu111 - python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda111 - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.6_cu111_upload - requires: - - nightly_binary_linux_conda_py3.6_cu111 - - binary_linux_conda: - cu_version: cpu - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.7_cpu - python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.7_cpu_upload - requires: - - nightly_binary_linux_conda_py3.7_cpu - - binary_linux_conda: - cu_version: cu101 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.7_cu101 - python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda101 - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.7_cu101_upload - requires: - - nightly_binary_linux_conda_py3.7_cu101 - - binary_linux_conda: - cu_version: cu102 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.7_cu102 - python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.7_cu102_upload - requires: - - nightly_binary_linux_conda_py3.7_cu102 - - binary_linux_conda: - cu_version: cu111 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.7_cu111 - python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda111 - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.7_cu111_upload - requires: - - nightly_binary_linux_conda_py3.7_cu111 - - binary_linux_conda: - cu_version: cpu - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.8_cpu - python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.8_cpu_upload - requires: - - nightly_binary_linux_conda_py3.8_cpu - - binary_linux_conda: - cu_version: cu101 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.8_cu101 - python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda101 - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.8_cu101_upload - requires: - - nightly_binary_linux_conda_py3.8_cu101 - - binary_linux_conda: - cu_version: cu102 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.8_cu102 - python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.8_cu102_upload - requires: - - nightly_binary_linux_conda_py3.8_cu102 - - binary_linux_conda: - cu_version: cu111 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.8_cu111 - python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda111 - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.8_cu111_upload - requires: - - nightly_binary_linux_conda_py3.8_cu111 - - binary_linux_conda: - cu_version: cpu - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.9_cpu - python_version: '3.9' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.9_cpu_upload - requires: - - nightly_binary_linux_conda_py3.9_cpu - - binary_linux_conda: - cu_version: cu101 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.9_cu101 - python_version: '3.9' - wheel_docker_image: pytorch/manylinux-cuda101 - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.9_cu101_upload - requires: - - nightly_binary_linux_conda_py3.9_cu101 - - binary_linux_conda: - cu_version: cu102 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.9_cu102 - python_version: '3.9' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.9_cu102_upload - requires: - - nightly_binary_linux_conda_py3.9_cu102 - - binary_linux_conda: - cu_version: cu111 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.9_cu111 - python_version: '3.9' - wheel_docker_image: pytorch/manylinux-cuda111 - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.9_cu111_upload - requires: - - nightly_binary_linux_conda_py3.9_cu111 - - binary_macos_conda: - cu_version: cpu - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_macos_conda_py3.6_cpu - python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_macos_conda_py3.6_cpu_upload - requires: - - nightly_binary_macos_conda_py3.6_cpu - - binary_macos_conda: - cu_version: cpu - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_macos_conda_py3.7_cpu - python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_macos_conda_py3.7_cpu_upload - requires: - - nightly_binary_macos_conda_py3.7_cpu - - binary_macos_conda: - cu_version: cpu - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_macos_conda_py3.8_cpu - python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_macos_conda_py3.8_cpu_upload - requires: - - nightly_binary_macos_conda_py3.8_cpu -# - binary_macos_conda: -# cu_version: cpu -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_macos_conda_py3.9_cpu -# python_version: '3.9' -# wheel_docker_image: pytorch/manylinux-cuda102 -# - binary_conda_upload: -# context: org-member -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_macos_conda_py3.9_cpu_upload -# requires: -# - nightly_binary_macos_conda_py3.9_cpu - - binary_win_conda: - cu_version: cpu - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.6_cpu - python_version: '3.6' - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.6_cpu_upload - requires: - - nightly_binary_win_conda_py3.6_cpu - - binary_win_conda: - cu_version: cu101 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.6_cu101 - python_version: '3.6' - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.6_cu101_upload - requires: - - nightly_binary_win_conda_py3.6_cu101 - - binary_win_conda: - cu_version: cu102 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.6_cu102 - python_version: '3.6' - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.6_cu102_upload - requires: - - nightly_binary_win_conda_py3.6_cu102 - - binary_win_conda: - cu_version: cu111 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.6_cu111 - python_version: '3.6' - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.6_cu111_upload - requires: - - nightly_binary_win_conda_py3.6_cu111 - - binary_win_conda: - cu_version: cpu - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.7_cpu - python_version: '3.7' - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.7_cpu_upload - requires: - - nightly_binary_win_conda_py3.7_cpu - - binary_win_conda: - cu_version: cu101 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.7_cu101 - python_version: '3.7' - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.7_cu101_upload - requires: - - nightly_binary_win_conda_py3.7_cu101 - - binary_win_conda: - cu_version: cu102 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.7_cu102 - python_version: '3.7' - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.7_cu102_upload - requires: - - nightly_binary_win_conda_py3.7_cu102 - - binary_win_conda: - cu_version: cu111 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.7_cu111 - python_version: '3.7' - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.7_cu111_upload - requires: - - nightly_binary_win_conda_py3.7_cu111 - - binary_win_conda: - cu_version: cpu - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.8_cpu - python_version: '3.8' - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.8_cpu_upload - requires: - - nightly_binary_win_conda_py3.8_cpu - - binary_win_conda: - cu_version: cu101 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.8_cu101 - python_version: '3.8' - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.8_cu101_upload - requires: - - nightly_binary_win_conda_py3.8_cu101 - - binary_win_conda: - cu_version: cu102 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.8_cu102 - python_version: '3.8' - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.8_cu102_upload - requires: - - nightly_binary_win_conda_py3.8_cu102 - - binary_win_conda: - cu_version: cu111 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.8_cu111 - python_version: '3.8' - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.8_cu111_upload - requires: - - nightly_binary_win_conda_py3.8_cu111 - - binary_win_conda: - cu_version: cpu - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.9_cpu - python_version: '3.9' - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.9_cpu_upload - requires: - - nightly_binary_win_conda_py3.9_cpu - - binary_win_conda: - cu_version: cu101 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.9_cu101 - python_version: '3.9' - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.9_cu101_upload - requires: - - nightly_binary_win_conda_py3.9_cu101 - - binary_win_conda: - cu_version: cu102 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.9_cu102 - python_version: '3.9' - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.9_cu102_upload - requires: - - nightly_binary_win_conda_py3.9_cu102 - - binary_win_conda: - cu_version: cu111 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.9_cu111 - python_version: '3.9' - - binary_conda_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_win_conda_py3.9_cu111_upload - requires: - - nightly_binary_win_conda_py3.9_cu111 diff --git a/.circleci/unittest/linux/scripts/environment.yml b/.circleci/unittest/linux/scripts/environment.yml deleted file mode 100644 index ca96279..0000000 --- a/.circleci/unittest/linux/scripts/environment.yml +++ /dev/null @@ -1,15 +0,0 @@ -channels: - - pytorch - - conda-forge - - defaults -dependencies: - - numpy - - pytest - - pytest-cov - - codecov - - pip - - ca-certificates - - pycrypto - - pip: - - future - - scipy diff --git a/.circleci/unittest/linux/scripts/install.sh b/.circleci/unittest/linux/scripts/install.sh deleted file mode 100755 index 6334cb9..0000000 --- a/.circleci/unittest/linux/scripts/install.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env bash - -unset PYTORCH_VERSION -# For unittest, nightly PyTorch is used as the following section, -# so no need to set PYTORCH_VERSION. -# In fact, keeping PYTORCH_VERSION forces us to hardcode PyTorch version in config. - -set -e - -eval "$(./conda/bin/conda shell.bash hook)" -conda activate ./env - -if [ "${CU_VERSION:-}" == cpu ] ; then - cudatoolkit="cpuonly" -else - if [[ ${#CU_VERSION} -eq 4 ]]; then - CUDA_VERSION="${CU_VERSION:2:1}.${CU_VERSION:3:1}" - elif [[ ${#CU_VERSION} -eq 5 ]]; then - CUDA_VERSION="${CU_VERSION:2:2}.${CU_VERSION:4:1}" - fi - echo "Using CUDA $CUDA_VERSION as determined by CU_VERSION" - version="$(python -c "print('.'.join(\"${CUDA_VERSION}\".split('.')[:2]))")" - cudatoolkit="cudatoolkit=${version}" -fi -printf "Installing PyTorch with %s\n" "${cudatoolkit}" -conda install -y -c pytorch-nightly pytorch "${cudatoolkit}" - -printf "* Installing torchcsprng\n" -python setup.py develop \ No newline at end of file diff --git a/.circleci/unittest/linux/scripts/post_process.sh b/.circleci/unittest/linux/scripts/post_process.sh deleted file mode 100755 index b05be6d..0000000 --- a/.circleci/unittest/linux/scripts/post_process.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env bash - -set -e - -eval "$(./conda/bin/conda shell.bash hook)" -conda activate ./env - -codecov \ No newline at end of file diff --git a/.circleci/unittest/linux/scripts/run_test.sh b/.circleci/unittest/linux/scripts/run_test.sh deleted file mode 100755 index 61f6e3e..0000000 --- a/.circleci/unittest/linux/scripts/run_test.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env bash - -set -e - -eval "$(./conda/bin/conda shell.bash hook)" -conda activate ./env - -python -m torch.utils.collect_env -pytest --cov=torchcsprng --junitxml=test-results/junit.xml -v --durations 20 test \ No newline at end of file diff --git a/.circleci/unittest/linux/scripts/setup_env.sh b/.circleci/unittest/linux/scripts/setup_env.sh deleted file mode 100755 index 054ebf2..0000000 --- a/.circleci/unittest/linux/scripts/setup_env.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env bash - -# This script is for setting up environment in which unit test is ran. -# To speed up the CI time, the resulting environment is cached. -# -# Do not install PyTorch and torchcsprng here, otherwise they also get cached. - -set -e - -this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -root_dir="$(git rev-parse --show-toplevel)" -conda_dir="${root_dir}/conda" -env_dir="${root_dir}/env" - -cd "${root_dir}" - -case "$(uname -s)" in - Darwin*) os=MacOSX;; - *) os=Linux -esac - -# 1. Install conda at ./conda -if [ ! -d "${conda_dir}" ]; then - printf "* Installing conda\n" - wget -O miniconda.sh "http://repo.continuum.io/miniconda/Miniconda3-latest-${os}-x86_64.sh" - bash ./miniconda.sh -b -f -p "${conda_dir}" -fi -eval "$(${conda_dir}/bin/conda shell.bash hook)" - -# 2. Create test environment at ./env -if [ ! -d "${env_dir}" ]; then - printf "* Creating a test environment\n" - conda create --prefix "${env_dir}" -y python="$PYTHON_VERSION" -fi -conda activate "${env_dir}" - -# 3. Install Conda dependencies -printf "* Installing dependencies (except PyTorch)\n" -conda env update --file "${this_dir}/environment.yml" --prune diff --git a/.circleci/unittest/windows/scripts/environment.yml b/.circleci/unittest/windows/scripts/environment.yml deleted file mode 100644 index ca96279..0000000 --- a/.circleci/unittest/windows/scripts/environment.yml +++ /dev/null @@ -1,15 +0,0 @@ -channels: - - pytorch - - conda-forge - - defaults -dependencies: - - numpy - - pytest - - pytest-cov - - codecov - - pip - - ca-certificates - - pycrypto - - pip: - - future - - scipy diff --git a/.circleci/unittest/windows/scripts/install.sh b/.circleci/unittest/windows/scripts/install.sh deleted file mode 100644 index deba8f6..0000000 --- a/.circleci/unittest/windows/scripts/install.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env bash - -unset PYTORCH_VERSION -# For unittest, nightly PyTorch is used as the following section, -# so no need to set PYTORCH_VERSION. -# In fact, keeping PYTORCH_VERSION forces us to hardcode PyTorch version in config. - -set -e - -this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" - -eval "$(./conda/Scripts/conda.exe 'shell.bash' 'hook')" -conda activate ./env - -if [ "${CU_VERSION:-}" == cpu ] ; then - cudatoolkit="cpuonly" -else - if [[ ${#CU_VERSION} -eq 4 ]]; then - CUDA_VERSION="${CU_VERSION:2:1}.${CU_VERSION:3:1}" - elif [[ ${#CU_VERSION} -eq 5 ]]; then - CUDA_VERSION="${CU_VERSION:2:2}.${CU_VERSION:4:1}" - fi - echo "Using CUDA $CUDA_VERSION as determined by CU_VERSION" - version="$(python -c "print('.'.join(\"${CUDA_VERSION}\".split('.')[:2]))")" - cudatoolkit="cudatoolkit=${version}" -fi -printf "Installing PyTorch with %s\n" "${cudatoolkit}" -conda install -y -c pytorch-nightly pytorch "${cudatoolkit}" - -printf "* Installing torchcsprng\n" -"$this_dir/vc_env_helper.bat" python setup.py develop \ No newline at end of file diff --git a/.circleci/unittest/windows/scripts/install_conda.bat b/.circleci/unittest/windows/scripts/install_conda.bat deleted file mode 100644 index 6612fba..0000000 --- a/.circleci/unittest/windows/scripts/install_conda.bat +++ /dev/null @@ -1 +0,0 @@ -start /wait "" "%miniconda_exe%" /S /InstallationType=JustMe /RegisterPython=0 /AddToPath=0 /D=%tmp_conda% \ No newline at end of file diff --git a/.circleci/unittest/windows/scripts/post_process.sh b/.circleci/unittest/windows/scripts/post_process.sh deleted file mode 100644 index 2a1ac63..0000000 --- a/.circleci/unittest/windows/scripts/post_process.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env bash - -set -e - -eval "$(./conda/Scripts/conda.exe 'shell.bash' 'hook')" -conda activate ./env - -#codecov diff --git a/.circleci/unittest/windows/scripts/run_test.sh b/.circleci/unittest/windows/scripts/run_test.sh deleted file mode 100644 index 02c6327..0000000 --- a/.circleci/unittest/windows/scripts/run_test.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env bash - -set -e - -eval "$(./conda/Scripts/conda.exe 'shell.bash' 'hook')" -conda activate ./env - -python -m torch.utils.collect_env -pytest --cov=torchcsprng --junitxml=test-results/junit.xml -v --durations 20 test \ No newline at end of file diff --git a/.circleci/unittest/windows/scripts/setup_env.sh b/.circleci/unittest/windows/scripts/setup_env.sh deleted file mode 100644 index 6a73927..0000000 --- a/.circleci/unittest/windows/scripts/setup_env.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env bash - -# This script is for setting up environment in which unit test is ran. -# To speed up the CI time, the resulting environment is cached. -# -# Do not install PyTorch and torchcsprng here, otherwise they also get cached. - -set -e - -this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -root_dir="$(git rev-parse --show-toplevel)" -conda_dir="${root_dir}/conda" -env_dir="${root_dir}/env" - -cd "${root_dir}" - -# 1. Install conda at ./conda -if [ ! -d "${conda_dir}" ]; then - printf "* Installing conda\n" - export tmp_conda="$(echo $conda_dir | tr '/' '\\')" - export miniconda_exe="$(echo $root_dir | tr '/' '\\')\\miniconda.exe" - curl --output miniconda.exe https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe -O - "$this_dir/install_conda.bat" - unset tmp_conda - unset miniconda_exe -fi - -eval "$(${conda_dir}/Scripts/conda.exe 'shell.bash' 'hook')" - -# 2. Create test environment at ./env -if [ ! -d "${env_dir}" ]; then - printf "* Creating a test environment\n" - conda create --prefix "${env_dir}" -y python="$PYTHON_VERSION" -fi -conda activate "${env_dir}" - -# 3. Install Conda dependencies -printf "* Installing dependencies (except PyTorch)\n" -conda env update --file "${this_dir}/environment.yml" --prune \ No newline at end of file diff --git a/.circleci/unittest/windows/scripts/vc_env_helper.bat b/.circleci/unittest/windows/scripts/vc_env_helper.bat deleted file mode 100644 index 9410135..0000000 --- a/.circleci/unittest/windows/scripts/vc_env_helper.bat +++ /dev/null @@ -1,39 +0,0 @@ -@echo on - -set VC_VERSION_LOWER=16 -set VC_VERSION_UPPER=17 - -for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [%VC_VERSION_LOWER%^,%VC_VERSION_UPPER%^) -property installationPath`) do ( - if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( - set "VS15INSTALLDIR=%%i" - set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat" - goto vswhere - ) -) - -:vswhere -if "%VSDEVCMD_ARGS%" == "" ( - call "%VS15VCVARSALL%" x64 || exit /b 1 -) else ( - call "%VS15VCVARSALL%" x64 %VSDEVCMD_ARGS% || exit /b 1 -) - -@echo on - -set DISTUTILS_USE_SDK=1 - -set args=%1 -shift -:start -if [%1] == [] goto done -set args=%args% %1 -shift -goto start - -:done -if "%args%" == "" ( - echo Usage: vc_env_helper.bat [command] [args] - echo e.g. vc_env_helper.bat cl /c test.cpp -) - -%args% || exit /b 1 diff --git a/.github/csprng_architecture.png b/.github/csprng_architecture.png deleted file mode 100644 index 3697c382ab74cafb70de5e961b7a56e3279588f9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 92773 zcmd432{hH+7e8!@E8Tgd_f(~<71u5%)IfKS?HS~bliEH^!x>F z`^S9zq>tg_hEJsxAIT90yL9qobG9jWv1{1l1>Vl_voL(07U>t`8-tCD%iaOcktFbK ze5zqhFT?3gi0l9920s352&CxsRbt}dVJa$STX>V^EFbSWZln9~OTVTJASCFZRo+sYo<&u=ZF?f~ZlJ#Xz*$ z=DLaxR8+98fzJ>u>>x`lJn#t{{9FbWxW7JQ&tSeN zeMeSN5&VD0%+cK3*6E?$<7VQ%C*V~BmYTYcbybu_&FpNrP43&7nscLU>@iKS5GYab z(Z>9-2?NT;+SW-FCC>QQH$=f_%%6D}8UFgpV=Hk+T@`f(SvyB_hU?sd+`Nnu=NT9n z5RUgBh~7rZ|J@w?CeHZq@nd^Y9v)X$S8i7UZaYT{9zGEf5guNC9)5l<@C_~}H`~W1 zC@xzkrvJU<*L#rWPG*jl_Kz*?Y#A``H8Hhwek{(&i0SB`fBze&Im+^PPqt2fPYXPPPG4=h|PTrFJ2wh&dly#@vb z$J%PH0|Rr<(}{J+_uZ&Hwq{amW+n}yyi zQmu}ee@Q2?K9nPWa6>fzm*k|@pj5YY4~7P6<87-qnnx{*xMF_KKam&jjcRK0UhgkS z&#U3cp1-x+k56MAeR|+TX;b=J&{8~Pi9$B_c!b)86#rh7ZftBPPG_w4jI)FIEkz!v zy6zL-fIEMFdIdaP#EO?=`8C$x;{vZRWOz}yjYE|6Yh=IQig}wTc=`>SGfe5<+VS_7 zs->h%7S2(q{u<`LJ9rG9zQxt0i2GN6zxqM>`ZV+qaX$aEmdyM3G~6aADQfTyB9<{E$0UtdZHJ7-OO!__}OB>_+Wzb|lNh5vhs6$czO$>YbByQA*C z$Gc1DSxFYQbz$iqo2`$R-j%;esGeq7xKx0WOLk~(eJdt$xLmy8OF;LT zbMCoQXUTj<*qfvW$!}ZS$7V6hO6841DtTmIdAM5TBx`uGnZVZhGWqxP>zi14MdJJk z`?C@ZT<#7%NU`Z@H{5x@eK1fbQL~l0){zdmCARv(ta2t^FL`z2j+%HJiNxOcc3F81 zSD2zzUXlEhMw-!R3Vr*J+iAPiY^iRuNu^7MPZl!oR?F1q9O}7zH}74{El4AgJX(Kz zxKd7XTM%n7*8##`v9J%;T6VJcC$--_s0uSsPW9NKteLLMsKSew^y!u<6yIFu98sLb~_1V8V3E=U7+kBV^Tu_ia(6Aw|Jw0B_ z($X=we!QI}d8Op2Q*2F{_;u~k3{O^yU9DU9CPzUv|4QXjzUA(SE4?7QvCsFm^}YSx z^pn`yy-z%Ew6yQC9M9gUqgJPt&W+PJz2f>KrW44lq?I-0^)ppbOR0_9HxEAXc*WEn zj;xXB8hU|6vAA7Qu1e)MmFB*h1g#p^vd}T|bRM!u_v|?wJ3ZO%@;)if58et?93LJ# zak*!i8P`+beY|ASKmN!}dOX=GU(wF3U9ZQX<;o3;6q*E^Cbze$c+?5_KR?PBRIOHW zGOyH~p6ow2u;HKE7;zcqvMKp0AF*X0>$d$>!OXFnZO*#0G{MNr?V!U#;#pLU`)Wm& zMt(K^x6}cebLr{zMAxme-)nTL{oY%?JA~| zLG04%j#um!=3hmrI(6_V7@ovLc`zF|_i5qoU`rEj5FGxPlSrTv9(5itaI?2R*`KqB zVu?=$J80ihu-Dn;Xrsl#gvcSRj{amlXhCo6Kzqnz>kG}Nq}sLG!^>l4IBVC+^iIBZ zZgq&nZI{~xs-n-@pX=3gGI3ra{b-u4ccAX%r#{Q@A?Dcd+N~O!v5y?K zt>P*_w?WdFOUQHWojcw(!#rL4*SwIid3$fOSKqmp=p=ET(+}LWNNsl5c&p}lEjDlejfWS@%I>K9k4Oa>%;)i{s4!vC)l4#A&xIsR~$i zm$ccpTsKy!a4xCEHX3ecP-YI)tU7FdqTM>`&6i@sM&O4xEk)W?y3GmCX+kxy& zy%tF7w0jBi5^RoCS~eS>D7)Yn(eoQYd^hj9&BW!(ZMiOHd7_aWyp6=A=8F$bmMtW$ z&^ypod0~Yla`X=D^38`GR)$;2kH>upSgRBAKTL*hzqwKKHeYEDW2q9?@lJp)zKQZa znu<7{mX_P+^*x71G7WW!F6Divi6x*wF*tl-w^^;2>r5-1sZN}&?r>Q*ocR&2vyR!I=dB8SlrHC_1Y{_gf)heQ0)y%f<8YCpK`*) zqd0t9GD>-}7229Uvi#&(JUKGLOH@%2ocu(e1E= zek~&>rp5pDRIm-o#&celK8M2cAXQyFoNX3`ip#j|oC!1s9%e&qz5Zwssrv-z9C<|{ zgz&lRsGAdt!9=bk$(*=^Lf6gz74G%6*l3Ivp$beoNJJ~6pvmx)<%F(HL1oiIsm8^rcF>L1-Hk_~E$st_- z-jZTlc{L^|aO}#c8*%&w|7#C;?{1=MpPlHee*OmmyHe9kuHIf3ihVt20bRq?fJpUNiW67|v4adliQsQxjGZ!LZliN--+7PH;bn!LJZRqiAycg5ayc zw}P38_m-NjlIb!*_~vurLztD*4bqfrJ-QU6yrl~pBvKMT+LYlF8bsgJEgG-MaU(hCdcx*g$u;ttY;cXu61>Ok8L}-C|L} z^+p8$ZqyEMH+SQC*!MmETaa6jfI=czN#2?n{h{uvYg2UL%OT2{MG4uxJlilKa>kIt z^#wKWCq9Ku63ZpMs%}1p@5oJjrUItdOtMm4?w!=ERBv@I1k%`M&or&Uq{~WF%l!vC z92C6XQ+3}Ng9p(*C76W}642U9?KE+ellLz3xP5^>;5}d_Hi`3kMT|p+U+f7^#d<#2 zjEE4l;g&Cf1Fr4Fb!zjeXS{=(gTedG>9mdMdrfSQwhq-#{OOO3DI$h1Dd!B2+a%Gh zQ5vF543Vd3_>YFl)m;_p19}ZSjwn}zawE9_&(T<2m)A~lNHkxhO=P-uAgVcx5+au` zfOHzVGRca=HO3ctp-kgCQtH4n+X=n|*A5Jjq94mmXYPEE4(ozhh?U-cOyBAJx_9NS z;cwscg#gfk)Q!cE9`g(v1XCct?IJ8*Y06Rrmm7bd`2PCsLC1EkE#ttf5k6fu&ip%T zzK&w(f`AFnZz1sU=#uMsNs0ba@(>z0IaLF`9};EWbkL`k)*rO7^U{U*YYw?tH;un4 zen0UxHJY5R;n|_+R0txZi4TveK?;hquEg*ys`?&EkK!=#_9AZI% zSjvOZRcr8OS)+shSBXWE3Ez|R8X?FzW?KcwG4P?D2S^V2TTtO@;1N9S+G=J;N{`m> z=DXH#xPMo_Awx3iLSuGHpSXi8C#e;VfxJA$Km|A4YBdTFt0}PAg4*p(T_~z zIQ*!$JfHRXXKzmJB0t)uDG?ecJUb{q2%OKvol*C-qqwOAr)zLiG;L!H_{1&)rwiS1 zm%f<~qCd$w)9|e(^}s!f3^G5cp=Ncq&+#XskS!9Mx9wKw>?SwzjmV%2+hPs8E!P;w zfjQfMDU?oqCwK}ws_A83Wu0ssPr~Umo1~@3r(BgYmSdg^=uAx?y-n#GWk2!fS#?FQ zG?y(wI#{4DHEt-R$P7zKxT9+9com!Oa*^$R>v*n57}*$gwm@NX3VZlLj{MVQs>DzH z_&w!gUhWif4=DrLKVcOl!H34*HTfh-Gi0a~cZX-=YA}Yczp$mDm9aN5j$o655bIeN z$K3qA>63<0TYTpf$wwG zz4ogM@+C{WE+df*>ZqH$S|KA{H)_l+SPAj~99x!}3G*CHGK(@uVi?o`DjN(}zDE2J zHzr|Byy4Es#ufAZ*UHbCpa!t)hMi%%I$GP$u0tbl3!6kPOt1`{XF~J-nN@2@HYyX7C+73(`{Q2A2Wu@l3|){57a z89kE5=uEli$ndgytRkJEagZzF`?O4ukHIFiOAaI%)&fkiBvv}HcG{#{CI*JGi7n1P zPJ(tv;Zt!_(k6tWzvDQdF#2`_u& zI$embTCp<~Rbbpp3ce^0J``K5cQDh@gs%_#KpQl|l%5I^l-(p6e2sV~H&ECg3(fcN z0v}5ooSTomsZBjrS5-1TgKVoE`+7xkVT>)o8Xp=OF2w&(9%by)&QQwT!8)2pX{u~k z(JG1HR&voI8MY}mprE-cjxcJ=Ssc>g=E;KT%TU3a3XfEZ)X;bPsYkC-wHREz#z#t8 z53PbC7Q@X-O5Tqt$uo_ROkT>sZJ{4!33e$-^2(^o34~sx@Pv*nNr13=8$p9zcrh-q z)gclq^kd?YvMxGv;x0N%m1?FH7p<-nYFku&5K^d`e&unuG%k{*&7lKLX_h-Mq_Z?F z#zjni!w-8h_n{v%SZ;(eM+e=Y~^s}EjHYz?AyA>bkD26tp? z<&)=ZzoSTA3A}CH%k-;EL)T0+OeTEE#>9^AV>KEFNj4~StOXsNU!zqiX_H7AB%2U!f8SYKs;|sH{^xd_Zj42$9QSRhbPD zK_pYtDVnFfdQ@?9I*d|zM56lVJG*!JPsK}3Sx3cVaIej7gsl6r5h{A+2ENd^UPe!W z1E(`FY5cBg0Ack%T7O|CRzmvKM-o}-$Z%!Yc83}+P5iw52SOw5BMT0s<>B;xh`C39 zX@S0>Y9l|fbjhRO$P#$ut$3#HfNUw6hTZ~w$|9_(L{>_*s}$X8&MhSVmWxcU327Ew z_B`VZ`MxR6-+_3S5Mjqp`7(EB4p5t$(I?tIa7Y{9z1`yLq)F9#v5> zhwz3DXc(9-WxqONM)Pb+eWjl;|HPJN4<{}yrZ=xa?FYa)bpl-DKXQaSnf0~V1ft|` zvYA*xEKn5kx>sy2ynK^vlzWe7Y-79Wv#(Ad^K4SrDHFDYVHSdcAwj<{M~1zM5)9-@=h^FRz@1;`74Bp-UmMneRK*aN$AiB>+vrZtG zQ`SURl^uV0=Bm(yUGS|HRuTm)2E2fE_>l6R(n8T4+jOD8q5>@w!kE{|u@t0X4=Q3Z z^7n`}vcDS#W0QNs)e8-;?rD}^`Pt0oX;doOmD_XYS{tyMTG|>sI9FO58xI+0T@R?~ z#NKVXPki?|H|I$6Wk@GJ#Ih;2P}GalOfR!gGgJe;^NA)mBW;mw)WLk@>&_z)!*;`= zo8`UB5mB^GC_jeF2`xqyD=u$fYBIVR^vQ0na=`uv-$aeVdT3t0KRVfCB90ovGZaQU z#hL1V(>O+=na6a|CfW&6+hTC@>CoI+9gcHxFL$brYG8C>h3LX&8VWacsZJJR?A-4~ z8mk0u?S-4P+T0e&V4 zJjIuMy&2iamysr92|;~?l|>zA^9vK0EH_=bgF){0Nr<#TF22K7)VN;)o|@+dF@s=4 z)->~HF7o>cKMSuk+%}EwtbeDfJGtP>f&C({IMotAmG}eXl38KsXC&D;|F_p#;>)Ba z*NBg9O6kuUTpgGS@uKq6_<3oPxVE8~sSVCh!oeB9CQV&Q^>aN$N6lGElO!IpW@+BH z&f8Yj{_yqiTN*U~V1NnAkO2x+YRw%`gs6F!kdHQbHPx1$ztPl4VkV}QYh_fb?7<^< z%Msdl0g?MsGgCJR%P;6AM=nX^@Jd#UZC0hh&}RpQL~UxuPix6zVs=xFP6766*6eCg zxA#77LG7Mv%R6rII%dR}Eo6jox24H3-*4GdVX$_h4OqXpq$#a^9gW~s{3BMz#`R5>rpX+G$ykHx?agFgU=cxEJRgzVHMRFNOS|C#o zdH`=i79vMXyuN;wpwL{7See>>Q&V(vE4E$1Dj8jZp5n7_GV?(QnDftT56e*ruYQ<% z?;|flMm9j*K;|!lANfsfbEpy;Q|UaML^;a%&1jA<&{eIkEn1{OGNZ1|kvM~jD)PRM z;N{_uVw^}8GVNz$Z!Xd?dSv}bc+k7C!;93|9Ep|D%_E^|&De%5G)A*Im_5Z_%_ib0AxI`ysGIJeO@`9uGFYA~F6DeRiwT&gCQ*z>h%DAOD)kT+0$umk#lNk4i_E?2@vTVceb zlU#owzh-Bs@>7J{jUV>1W|eUXISBt z)mp8)AszvOyR9&LQIwd>?aGrX`PIb2%I?COZM5cloMR}>+IJjn*O=1`BZ_H(u z*)pZeFr0opacNtti>{W5j^Xio6~`{u;=?&7!LmCf4KQ3KjSwL@(}!Y5HE{GGXp=iHx6$p)%DlrQQfGUMZ@DGYRro`GGSH&2l~5mf>zQI~lOSatL!HAZ zrp6t)^R0){+)VgPXnHMK$Fngr|b7GEQ9lsWuK{cm$N zvEVDBu^5>*Dy^!;P}hb1n?7gy4@f&p8IF*c$`NOvW`q$7l=#)m1gy$gk5HcDHNHh( zin}DLH1O0|MrY;3iWX@;Ts}9PNENYy&LVX)XwdU}V~^k~440PJQgUFF`3{xduzra! zm8P3Cx7%T(7?%e|%x->tkeaH0&Ap@DkcBl@% z`4%WlCi}C1=+VH+O01zhssjzjvJYp(3*ciX4TDOfFCN^Gd8Y&q_6jLvT9j1|M-HNd zHfS3B+3*NAgJTB8y#}^z(D@p4JXsF{2Z$=%cz0e|bJP{_Bl9n}4d!)L?ypBVvB>D7 z%Qh)Wq0F!YS)VMD&Q~7;)*SL5TxJp!>5p(ydLDFAhg>?Py;HP?x)tXZJKh>b#9IcN zytQ;$%qY}eKP??LOE|`IKY9$ylx_~E^A2qGv)yI8VcWZ{;bu{030shs1^`&Z$#M?p zJADb{4R_6F{p}idd*TE(IHxZp_I*|nys=)rAB>pk}4xl_x5XSIXpts_qzTigizCj>ysU-c*!bwR8S=C5ROXN8h|S%3ys1eX+rgN|4U+Gh76byKIE6k9iX9 z>@J2T1L?9P>MM3SQz#}p$i%rR;qzVeGO-O)Ofp~R&1U(aZ@OSs&{*RpJt7!TekBsg zy^o~tbz^Su`gs!{5%!7gPd_71yL!}jHwl$E1?3@>xxLRd8RthuW+<2!Wub`b?CTro zthTiysC8I|?>YQL%DjY%29o>9cKl%3_qSSQP`)+;TUK|RDXkV%TndnLNfA*;84Tg zFPH8&N%`1Qg!rtEc*aYR;~^6k2kb~196{H(7uE?2F1Pv8Te_)X%Ct8@wkN* zDmxDu4+e*ol(D1P>hsQg4D!(sj%PRm^|Noinemzr;UN1LLOf1A7;mci?auDq9kq&6 zrthk$1SE!!A#QxpnO}l0c?3}t_Lx!c7dJo`A-?CseEIh8KvQIHA`uW0ltO)J5~YE` zVa7?N8WoHzGz;e3tJ;Kx$JX=?B7W_V@C5BWQ7Y|*e0p1x5q&}9!{#Ev*toi=_na9Ts1~Rn zbqky>Rk>xOy-ZfqbPQqvDWv2^J1RC%LAyO7CX zC(W!W&5Rr4Z0S}AZ!r5UnLW`d(FPIPg7Jb=ju6n>z;m zvcfQb*CAQM%bVZAo5!UaP za*VWpD>iH5uXAY<&+rB;hsq{QO{N}-Z7Eq-1Gy45Gjg!p?=r{}$${{ICT;9lD(H_N z)zcdaY|J(~j?K$9UGoyNr&7|L5`Jm7ur+bgKTk90|sMG4SuFQNAzj@n3oBT3E zFZ#mq!D&%pP04TClO234J_B$%*DuIHKAUiwU&%6KT}*rx@Fk9!R}g!18~Oe*0x5$G zz3$e2MQsR{N)=rukR9cGyv&V_*o0U#6cZPG=($-PsY9>*geT7YdAw8>9^@HI*X0TK znEI+-QBEG;5I}mcWZcd;{2v$MS*njnJnJEB{3Pl!b~Obf@_dn>J>2o-I9!IEDub-7 zS5@rlwTOE>B|3a|oiayENrIk%kl@{Oc884Nq)Z+hn=++3xtYwo(O4qc>$6PvBRmYL z+`b9^)(C9EZ2>@c@LClg9__`0_`~UmLb3=iTyy$}Y;3*TsQ0;KgGH*xXj2*%UkboU zl1IdD);E<$Z4Gm!FA2-ybdis9xiN2%OwHg;v;M8GcN3{XC4frDwjV^uh0``BLC@6n(>q z4Evih;wfX?EMb}TCHA+6|BKWC;JV2_<2C=j=(Y6%Ca)YB$5 zi5QnL&)t$Aj_nr)xpBv@_vV2}6-V3Ie#$C1ie!Cu*^z}gX>~+gNEgcp5!@(B((o-U% zJ(i4*um0s`xIfI(X6A3s&0mBiWo z;iPtOG$)x(kPk>BQ!fH}3qJ`B8v6^ja~2cdGfe$Be(?}64el5SvW|(_tR)IQx+t_c z{Y*{hkz8IkK3SrK=YbD(9pE_hU^2wmTWvfLPWUxGNrYFRJMGjtT?->XU@6XSdjH^f z2T(3N)b;ia;8>^g3X)2c`u5UK`f?4gzc`r4lNHz6pwvjL1G|%}oZNwZdAIJ=v+`)O zeafo6OE`hE?r77^NEIXnG29Ryo63bZTmw!of2VDH@SengS@Gwc zxFk1uyf(rJN|z?XcsjR|-L#4CnLM}H)^l&69(euzh}3(pQQB0cVheLpwHHJz+^F4% zxZ7dbu{YL~U$vSn9yQN|Fph>=&)uloR}r0#mVIwQX1e*Vd^Gt% z+WsleE7ZN3C4;rLCC$?!@4O(c_n#TRZ(VI0-w!Bk66!Z}UlClZ-fCu)JcE@pcT-|a zMMjGCGzwaO%injA7LY1FJHdzivk01B6w~L84Gohjv&qo#)NJ=~c76+JoG?J&P)YqF z(1j2Y%&X)?4hB%%%XV?QdjgmutuNoKpkq9bmyEh(c$RwKVEh-IpPLDFh*Z>Mh{$7- zuAJkYVV;R&^aYF5^y6iVh!u~GFe7Czl85 zsk~-5jFJpDLUo9Bk2(apnDIL>I5W=O@ay`R#>?ML|eK*f4ZlzlYzoeqT1t5 zp|m$SJFp}iCx1NdRaxNi-oN$aXRcA|!vT@G=g%G_TfW)ubgdQO0(&t6!8ct}adNQK zYjk=r@E9PRSs0qv_d~N0=K&R`FV}S{lH^HfhQbISaJhBaZwv(&_C0f!kmr_p(wGZn zjbB8rYlransqKW+nn+@thH4&;cO|zO1>XwF)U10=kWct{?T@T^B{aSq94N1z;fg5a zK$tzm4f$co;i+0BR*Iq1RHW(IRYSqDm ztAJxB^1@?;5>G>z>8Uy;jzS+$?Mit%rX3=lR9os}^(Xt$hYP^8Sl<(GA>RR#wQeLFWHX&zG)f}cLU@ovym(iq3YJNUIvE~9{C&~pFeU0~t2#Jfp*KMTB5 zBPCDv5W9tqBsAY_DXP~z*V-G%Dh6DJEVBdwPV0aaF4& zZ?3l2r+hpPlGuK^9arFysiC0Ez#DB%EBhRhYsFJxI@kp)$?;oN-66NR)MAED4B}{( z>T9+8U+mtf2z_Vv1oVrjMW<|1Mm$4KG>O*~&DcXu4mN0|s_4RN>@-d^HoPY$dO!PB zWZN+3z{gx%wWxLV5c90(j!FlBEkk2Oliv43IO1HFS~PRgbZ_6uOX3N&V47fKs)iN? ziOm><=MRUL8HqNdQ15j;`;X^}*EQccrP!1y2GKovs8PAkJaL;^TnT5!w4tdE)$)SsImu*}|hL&afeqTaw=Vm@Sq1bF>x?`RR}(43{j7$isA6PA0p7=)B2Q{48dU{M@Mh zK|qcAh@B?pyz=E-&^`~6Wrti9g&NnAo!1CfT0^yn^#JfTRRldp;P*L8;^*6;>5gC| zN*kX1k-H%*F=$Woa+)ms+C-+T7hRd0J2YxBygYH@E6$DTn$PY24+9B^1C1#{;&PX! z6lF$&S zu@u_ZIc(o}Ve1G(o>>4RxuTA*_D(o0g9ra2)dZ!??0W2^u8ij8Ht2dGpxGIf%C+yq zCrK<)E8Aca8>Yks++Mw?ccuIX9Ej>j;mKfmi8qU)C_VV9>4w<5_>HQ6X?~m!-L0A- zfif$7LMTq)1U0PHtsNg0h`#9ydvmc2oF~%*Pno2~;ew|}TXS;N4n%f|pB*mF>@!>P znG!X-5b_ZXKYSJ8s7vmT@rSO5T@J>FFb)E9)y^p@=TO@K-)1cf}Nu6fNSMUC$u+lbS+p+*Vr)0B@qrB#P z-38qVIXP3$ohrd<&jy+Mmper@6g+IpM_fdzxoZL3`8Ke7xMNSH$qxIgGy4Q*b)oQ% zZh}2R<*cuIEF~3{={3(XEIf;63+SHJUmmT#eh4wern^@CGp8enY!ttG;Vtctx_LRC zwo$<%uYJWH6XLa$+nT0y{$HTN4NiEZ;egN@^~wL~wP$M(S$1|O(d$-r+z3B!h$Vy} zSk#()Bk)+tUWWGVy(G1)&f)WpSgj?0!V5JV^^YcvGAg?_OdwR)P>s-EW))6x)X`nip zvYAMWYAZk97u`;_wA&N!nofe)wc;&>B7A&ZY24M&Irws4-N=N29#b9Pc85_Op(gS3|zU-m;bQM#-kqmDYU&|QL;%>D3hfk(nxVF*D8 z6LWF%EkE_5^}LU-@HB$VtIKGIpDev2Kq0=BmRF{#)=ho79r`FwtzjOeBDMnO+gm9g zON08|Jn)Wy1Q*01=e|Y64cIj38Hxx zXCr*H^nkZc(7_nvj5^>f{dOBSe&d`&NaeZ&b=$g21LSSkk?M>nGYi}HdAn48JMGxJ zfB|jhsGB|wlT8e<7jC+$tB_W7`v*-weq{++1{2&2;w^c+pteOnYRnuw=h{>xwdu}T z{WY*ywX_1F4e@|vK|Z3{8Fw6>Blk?5Q}?(W!^gF5#KyIVh>Yg64h7vC`&T^!6%PoW zrsPIfb_B5-N(vndb%YY3g3C5`?s{e}!B;xiYvL}SOG}qxU=PR~Br(UZg^zUN3sj#q zhUhd*T@A=?FitLHXvjn{>3*Z!zh`<}xzqf9!b>|KK08PVhJ!TSfPOLkuAc5(P{mUd zV|onrV!Z3r5v7KX$&veDB2W~)$(Km#kdfzKYxTg{&UmTGiXEYuyh1vhIRZ!^&RSNb zYJKmW$1u*#4P+N2NcMilDb!ZUF0b-ui{off3th^vEXg%aM_qX21Q(^94ad34iM%_j zSXs!f-p^ex5OiJlYDYEnU1>BwRt{<*>t&7@I1`UBck^Jp*~~2K zxkNIE+Qn+yoWKMau16d41G)LSF`@frE1TK-84Hnd<|tK;4ZqL^*h-_Y>fHUxtR?o1E%Vy_9!0f5kP!U#$;0yxLd_X$79Z1|ymIK#`43IAK33`rF;q z8=9!XlEsu3dT)e|&XL)N8bvyG|cM=@(*_QIQ- za=O%8?yc*bOy6SfFG9k%*+%tgqjU_MwO+l=ym{lf^W{r@%F)X7CWNx{x#w?Lq&jPx zz2hgQb2cS1 zBu{-p8zYQBvTV|N9`vTuF?>eeq#MpB_f8y(l;gLquKVnC=aT0t;Hpdaan|lly3rXL zI4>1KBXteNX3o9Zj#3vL)QXC7bV9qEXu)1_3Ouz$ctPWZ4L@JKC}TdtC-QV9{Jue? zBAOP>_%wt`JD#F-1Ll#Xob;6Pg2fd?`sWz*9DT1AN7P6q^DuhB1YO_f)}^&H<$iUN z@t)Ngkb`WV5{GqdH6iVNs+kf)^_A7y!&T|wnsGm(&FV*V^rQBd{0Qvqb1&Pigxcrn z24*Rthc_N=_E`Dyd(8cut?qw*#Fwc+G-l>r&Bcj)u?`8f5wzlIZ@d$Dq%dQgWS+UJ zP-?(gAkeVUaMQ)$k_V&Wb#q3e+p%lY#T~iZz_E7i&H=3IofmGIu6IX7x4&3srN$40 z+2=^dh4+LH(xY!2WIURF6Uy7N$%XE7WZLg6x+ueDdcK8x)K1pzYsCGO)cjkWY@>TY zgO}_)!$hIRjbYi!WRlOF(?G$&(X7nqBi)1-DTRH77?v*U7TXL{`hdEfABRqOwQUCt z*GA!wwBCP-15=A-8&i@-D93<+uQOh~u)(r<=^e<0%w!-UAn1t|hqkL()0QU_A@4zH z$c@?r^6Ea~Mp-{}JzuoBy*uzTGuJnhM4cZsHnR6etpuQ&tX7SVCMjDcIVnmX*-a}} zf|JV0khzHt$&Abds8*SVS{1o@B(tMrz{eF(hR{Z*%~x=jN{`d-rwGUkWL>!NgO~C< zNOIEPx;>~j9V)T(*KTQK8GX|tn)x{(8_E5gS2!~BYxzXxCSV^T+-I703ILIJyH(AL z-&<~P;)~lW6Xt-Z!gArX$-{@%cEZkq<3aOsBn5l#5C`+GLL`VBN|)Ow^2l2#=p2r0 znFuKAE+D6%g%E{zf$;c>ekDGq?Bfd>@vryHy|Q2SJ5`+Zp_)@bF%xRnXLwB!r#h8L z*8XZu_~(TNSHtU3A2hBz_BQ2y+Bg_rF+)Ko%w-Imo5xe{|DbwlgO-EEqq86(zI=## zE`Yg&FgOXrw(pp-amc*TXE?fZX&;TDjWxZ9k1lepI~;L~rfY{@8RA1m=~{q#n^rm= zOmN@1Ujm#o!FUaw+`w?|0gD5 z6H?iWc~4#n<`(E}!N?*(9gF^zkVc)m=5fnOjS?R(`r>v8ZPJ|^v$d(p3!2IojPy6; zug;O#X0nbNJ8ufdvD~J|d2rR9#Nx__mf;HN2*kHie3U*M6l3(XFH<2 z_^35ZRZv>q$s4nZ^na;8z{PwUkMD)<(1>_U0(e;InD3+ zgH&20@LKGdvMzeKiBV+`<8**1Z9un9pWtiwbIpkm*+WL_5D(!JafJTmOC^cH>UHlU zxI)YI(NcbtmrR-E#lEv4lA!JLQ79{e(hNuX+;-7@)w-{{RFg_54Xvg5{+*nk@nb10 zj&rUuC*0vvi=eQnvlntR*e^I!3#8YAXdQ2gP6jhwmZ57=to{n)VMSmsJ+)4-drTBn z|HwUOthL}`gKqFpAHUZbmP52u0a@aN+rY`kf|_ghss;?*XME*HzC6+BP$X0q#z9Gp zn>Xrx=>$20YHwy&Rz5-8bxb|aXU{hXxDATCghS$o_v!K7Xc@r3~s#X}bBcAgWGA`@^sSfR&lvhQ(|AEN_a$bumtQdJNg+7jfBu66<@y#-v zSBHN*N*4M`iN0ROp<0q-$H?suJH`~g%-mmMZl!B0wiRBow~h%3Xgv}twaBz|~C z)s}~BR7la!Iv6Yd|atUEvL&Gwv?XL-)DSLD46tQ*z>RYL*Edkdi4(>L&d8D z!GsG~iUrIxjzNmNQU&yF%ahON@^(={{st5^{`EjPTH-GTAijIu@ zE~g}q29XC@;plw4B-m<2v;#oKP*VMz$fqK)3b2EhR zBb>U#Z)^+@m4O1jH{~GBQ`3EM_;=>~m*bRJ0y<;08E~zgx-sVAG-T|5p$7=LcfZX* zAJ<_iMuBd@G(!6i^_unepz!fR&=9kcMRh=?V&vG!S=Ihurhl6ljOM4Z&LH%x%kryW zx!VZiujSTRzdLJ@e+_J&8UqXTUJ^q7X`b7ZK&4I=dGt8WEZ#pKp9F%OP}k#t3ulpv zf53-hIZ)nOchKJdr`lNGG&m@n6)-T7{WXx^q9==%@`fc=jLeRLqOlc<(jRy%>rLcp z+Z}1*!1T46y|<`r4E!~_U!tyOX5oEZSON-_UV{~JeLCq-o{Ogq3hfs29QM7wn2Wqv8hzlVph=30#ZGD`o5iR@t*kfL59C-XmYJu4D* zuu(KwCSCuuvJAduKso+zcDtKTE(~Ped$iS+4)SABk85l>{s1o-AAxa1&>t!m&se0n zv92uvI7r-b=lhl4d-0cGl<|Qqp1n*MQTWr0c+Ue}mB-G}`h$!ad|RcAXIl%Ot(vWo zebDV{<@t3N0PsS4rlKU%Xg+{~iOj2a-fY{N*_>gC{v&H#$b69?R*+Hi)A|z2-kxu7 z@AUTT*`;DC3o|PKlZx}kTG?6Q|E*k9aIgztIFEl$z(B5u31m~YGpjdA#FnE;M5dz! zr^4wX8(%Hl`!&UX*4}pmZp{8;&p!Q|o;Yx!dQF0^#8WSWIKZ-?W+(X@;)mORWFS>X z28z)K(w2H%=mAKu@WP*@?cFJ++EQXGscU;7ZP5BZZG3>t5ctjE_4{+_>3fZ{iDr={ z0Ns!LUN#c_hb=$CW_&wkRiLz42!g>~LZXHLm1X(gEPPLZ-hHd{97y1j+#C1r#pE=H zRX0~L(2HjUz0ke?S_|J;FpTqH7?-W`V!$M;O2)hnGXd7cW9Qm(#Z?(Y5>G7sc;OGl z@ZI7-DMrjJRb-)wpajq{1Y|J8R8oYv)W(1bSOEFl%cd%nw|>KwzeL;}2Il^xNQYHA zrQgW=_^%S_wFL|U2dYmbmHld5D9-{h|65OTu;F@|7Q1SPQW{CbIP0!N%CCW1s)e^% zX^Xi#!@Bt`nO;uPrRH*e1eVSjj0w3Hg=eu5C&LfC&43|I`% zmYwsbQTlh&{|py|4r97$vxwekm+qGYUjH}uX%z& z{0S%^&jd&-6Ia=wsb#<=?VGvzm5-7gJ2T_IKJvZpgrew9hE+J$xzM{`?Ozm0H7W{gK)vUM>JIwe*OM?RR1@aLD-KLf#WBVQ5}g1Q znk(%fk2!`|1L&;T6M(b#5DRqhW`~~z!2i)!jFG4rU?b0wt3Kvh{^8W6a#Z>rBw!`E z3hQy+g?>=}uNM2#VcZO+D|{L$8mv>JAd%e*s(Cl#t3(0BRj$7(^M?UH2?CnEw*3ih z-3o3{=pQV&QL~_832JKXJfcD{qg~i&p;rUTTVP_3{pk$l{qJoeMGxh5bESWLTk?hj zh$@uBIN#qK4ZxtUB0;w+B&RipKt*CNK-^>#6z>8R=*`jv2 zF&8*2gbQ@u#KfA(py0cQTn!O}QFZy^zbF%62W5d1HJ7i8@J}L6_i3$iuZv?#~6s-$l~w+r`^HyW?T(;$PuojA3XmcaOUnTuyzL}Q`Tkwu@T@PqOt?ZHZ_84 zYxEysgI8}~1@4c;if{_opALe5Qak^ROMvnBR00RV|E1}6AVx3PACjlkgQERsm>j8~ zd0T7=Z~^;Pm%-HoPwCvIV@jIHZoLLEa8X&fz#n8f|J$ZuXbK15QisId1>n5g;&E%e zg}*$uz0li>xxfRl22yUfbuUY`@cc*kzN4(h?D}@QYGdWxNG6wQP=QTTBFSeLDKdK# zl!@<6)yafE#grtsPuias6RBW^*CzdR1K|4Zw{ zcpGqehMN(0^?Ji&z|9ieS;E|Rg1HOi5aAZ zVxa3}lR)Z&C-gSe|4}((exS+8;WZK@hsq=@%NXO2Aw~86wU8gCA`y0ALw2Te- z<%HoJq!4`7g3VzHL6>TFN(-)Nx;i&w_-O6|5RKe7@SkQF)J z`}sBFCy?~C_d56s0jrP22y5fVQ;&sK%jmvOkNuo~<=S~|5toNuKi2>?7@<@8PW->q zgNLLpATLg({woBwsI~KGPH1am8(4}~OUG*Ufs8#BxRXn8X4Ns6>s0s~_S@S0P*aLP zwL(Qw?LMIv9x^$@J23t<;7pM0bCpXz*MOz+1>oXrP`~G5d>;c=4@^S@Z($(U+y^dB z0ZMF;Ns_zFs=ONJNbZNe$uO6P9fAPB?tTL_(KQ<1J5+bxZHCVPC{+w}&Xy)@inyDQc+_oGI;auK70sYeDJ009UB9uS8FauRJEStLAD8eDAQ0Jq6 zB!MZ%hS~$S9k=cM3&8t+t6gU`nrws}Ep(%`n(G=Bbd` zE`aFlBMCN8-?x)##TxkNj_Rr|mWdPf96ljz(eO8i>4)ssRA}nzUa7%2uVz>Y^<)9q z4F2;jWgroF0LtR3zPE;k^?zQG--+6?Z`fm|e5dc0tOLYA%-#UZG}hWMEytV>r4RRk zQul3u}VUfa~p=IlJT0(2C%fg%Cy3=meXMzu>_ z4BI?OoFVgg{aHs4kuA*du>4&MlvlQiFt|DauCBf;Qp{zJdk4szteT@xJj1)7IAe69 z(9I%s%mQnK@-o!Sh4Sqt>%W&#Vz{P>rvZ^aQ^b+gxL7~BLZD>>#7t&4@oztP#n-?X z>Sqke=0Stsc`>E$)}S+sWRW(itKq|RQUMSSusj1|{vorw;P>iZ0}?joaD456*@nAE z$!imPA5e5!mtkd#L=x(!bm=1L0s%KvGi*T>JO&6cVxi0YRmwseT|1NxFoTBSz3YDf zOlfrO91n@iT^j#o4CvaT@o<18LaT)C>}$pMC~p8piPqPy!9pu2X%QN|hf_+<241`? z?YZBQ`SxP~z|jV@%gQ~({t*wUWj?6eLV&_Te`Sm#H5cxsho=@C5iEEzhk)gge z?(g7;kATtXhG&s1{q_-9tp-1rV?KmrNj@q9xqKavbVu$lzhJPQ6H#~KZ_fXNZ#^`8 zX=x}w4-&tS+DMn`Gzwsbau@Z7Z&U99l>!|-zwh$$Y9jXB3y@rl@)gblix7^1dYOFF z3X}#VVUm?y0#>xtGh2y(H22^5iete1;_Il$1t9#JLu5IGdb_}s;)Pn)@o?~Jo^ja~ zh`x583j_yHj){h4LC*kbN*S!j288ILZ3ZDJAvyUmlj+a-$B3TR)EUARHN#ANr+DsR z92XWbPN6DY;Jpmi_0n`=D@eNo%1w5ZAJ367fU4SRN?qW}35}EyI z8{m5J5pV%>6gC*P8F;%Goeq>JCH+bZ4j-HGQP;2Hk5qFXRK z@lHKI4WNDQb?Cm~3j$S*R(A>QLUSasV}~HPhlDPa+$5isSIa9rFl>%nHZe1SesA#e zV-{s^gHgO2BL&$vc4`6K6n#)$y|En1Ag-ZOFpL7t6c;I-acU%h`h$;MfmVZ9(dC*& zov8gbB5_^2U=UETh)lG-s8TpF*zH28?As7&}W03%L`O`(kX^~};pp@Z@!x^Z}26|tnu6=~E z))&tAcJ*_gmkDsDqPRKjGJHrj%p+0T^C;NH5jR8%-@L z-`C!ln;=9TYm(L3$Zy(TpYY`Dm;%TbDZLakg@w??wW1wK+(3WTkf?;Fs)E0k_Vd9< z?VNvz^?pcC-7n!3&GKnO{+oq2&+GzH%gnoFAn#Naj4-iL#kODd?qfEHNpeC95e%AJ z0+ypiFQ2IU?c(j#fM6pap+3v{!6AuIQ4Yb6@4rrX94GQX7h3MPc^a1ePc~Tc-m7JuE z4<|;=7^S3|b@xAS6)O|77Y&ibkSN=Hzy2jKwUYz<0(uC>tO+T^GLV4K)RdQ;=f$LE zfIMV9&(pfMLZ8%*UQta}`dpWaDe?Xi_Dv(l*d445L>!9y*f2s5CgODxGz&z##ModE zS+=g#NS(QHgCjy^Yx`OC)8SQkRd4gFj(ma{>vTuqoX?C*lsO6Fc)WEnkEqol#Gf+& zQ=x#t9yB2nOukGx=D4*uP+F>9hU(=Ney+&f);`^Hh_Za=H2D=DnXSXjNTdkw{j6WN z+kSr1IPWOL+6e+s_NOD>bK^7HdZ90W&s|~^lC;3MFcqYcQs2}GD2m=79esw&kPIP) zC>d>!ZFj_7#PJU2V-Ho{5K$f*`CS}gIY)T{+?y5$MVN|!+gShJ{a~x;KUu; zGC1m!x>xeKi2LP_XpEF?!(faQrPuh2?e-l=G!QcdaE4l7IUxj_^WI`2iqERhha3Zo zKGMVk2U$t4OX{(M;j0NyldNz59io9Ack}I*ADWGy8?P4C_7omjO`o@T-INm2m4F}N zqc?)J?<1TEM!2P6M@#!NQIIAUJFVgqrZ70}8Klb5MikumDMT(H`n>MT+XxgXEk^U+ zHboJ_AF{R#*{gFQx_YFh>Lf>}-7k{`1SY@IOz4tl4cG>;u!uipXBJ!xQ3 zFIPa2Rw0D9Et#XY`fg>HaN?y0g>gRA-AkIwUC>BQi6jvdxfzhJas3o3InZmwj1^bU zA>m5`o4fr&hdc$L1>2O3(BI=!9bm5u*4}A4hOa4BH|^7OzLrczOzLG7wpBExU3BOP z?apq~&YO_v%{J`mVVd8;K4-s+KaSu+3<6X_(QAev{zMa=-A}0Jm4MSv@DIG^0@;t1 z|0t4mogvzj?g~a1`Tq!@9JFWK?fw`dry>9=CCEY1R%)rI1}Ob^D7QICZ{dIeEKf}n z87xD|q!Q$*+hF)kG6T`HN1`aj)$ef>Gzs=!FF9Y&u|~N)#Z-*C#YR^Hn=9$1o?#J% z_N97Y_5erXw~&AgPzwh(Y1*=r)%CT-+UfCzriJ*3q0+DjMIi7CTK7;hom9&V$MRz) za6C{pkS<@T!9FV*{q|XdSzZ%{QOrsUap;rOGwITP^KGfJxHcBGf=_#^yJy`yk57Yt z`Qat$jY_B(F=`NT5btcm7UP?pkY*V~BGxVGZ-pOSR_Tvzkl9;3VjOjoRX&m8#^!CSfB`cYjmocUEUYvz(2(ax1 zXeGdRPB(3!mbMfpbz*1SMfFnQ_b7w0DC>;3%`6uNJu!pUgP48h)-mz!Gd2Y>cTOWZ zDs&Q*T=+i7mg2SfuK^C2ix`a<LuT#BMlX;fs)kn}$1@OK&ZlB`Y(CI_8u z?USHenh<8X+>&mONq#OK$s`ol)k__2;r|)s-M5v>(OHtPpjt43FkclK{BFks!dsi! z9p}Bf!9u12LBALf!nmFDN<<$l`kKdb7@L=L?4$!cHlT#K=2_KqLrs&giDjFa%gVM< z-n9k-5NnWr)oE^XCuLhzWz$~6B|MvB!L0sY#s2hc8)jW@;AjGh+Vn4vm@a_`uMgjG z!{=&U(}fbam+^;0(X6tVa*!%D-x+*`h&sTq_eGkM@BF&`^hr37>Cn7h(S;D!P>WI6 zz7~t>ud+=fy?9oUbGYvO)YKoYXn${X?S_+%W7gaP@bS>n%leoJ5EZUCltjq#V-yI%)I&6k^IJ!R5DbrsepjRB` zF^abB^_DDro-fbX`iZ}A^JPJxEwKb|m2d3i0|WU8XivEw6rz$oajQBSx-)j7VCpWA z2@!yoSn5Sl(jf$^x}$5Z9wr>l38B(<;@rNAiNiYGK2w&KqyhX5TVra|z5`!#9&m8BdWBdfDTEWex(GPGnd# zD%GZCcvrS4LCk|*ApNM2KL3=18wi#t^l%at zc%ns^B}w4#8n{2w+g8K(SCbMmK+)rumfxe8paCeCc)|Uo5QDB{!SIv$?GhO};ie8` zaq)%j;2_GAt!*?_8>BWD_+>7XbL?P${^=rRaa64d^6A>3PCsK-%s2KG=F|xwaCP@# zz-}gX7me7(;=>4S*po@5r7xuASON-c(y8{*^M>?ned|RssStsngnqt*iI)kCii%SOUB4mKPD*MG7NhQSlU|X87K{3K0{JOak(wm$(T? z+N2Tb*WBZ@>A|YADn7ASe3G>pW9zed4kZ0bhr+i*nr=fRrcu~RGp1z7H= zFFW%q%W&^8FFzCaI5C<;dA80S*GF}e^GP?Yb4(HsNfoxUb|^EgGIdA78_13|#m>Md zS)e-PI&XPrH;fk!_YQ(OXKBOoK1oJ_4mA2#2I7b$0XuYTY~x58)7|>ov54OA^F&T8 zjo@Io)wk!)`ytd(<{`Rs9vBH!8;`*2q^&Qw?RL|8^@q`k<{XE~;Bs8IHHA=Iw5=PX zy6WfNwDt4B)1ZFfnZK;cn-e}o#M*6@{OT7Pvhqu3aYPhm?3RJH z^9R(07uCBGD|x{fGi`b*TM#VeU zdI|L}#2V;q9Zqqhx|sLR?*!N*rI*XppmUTYojni##%)?305{=5|fQ#AC(OcGj z0m_BM>Iy62gquF$;2jsiPts>$_#$UU&Sk7*M4yC8nv}pz6J?(Ey}FR5-wu(#DR=Bq zPrU?dK<{yG17>@?nQnv5Re|i`z$LNAc%2_>j}B-~*-jtzw7jPrz;^AW&G>TF8dwzI zhWv>4s;7F1y^R1U%fz0^ke%2>@n;zy@XI@aN_su#x1G5vf#W(g9}N~`8&rc|N(M5- zcH8S={Sh<;DL<3i79m!LH8yirDng8X1cC5a?t>b48yCeVu%<-Lf=MPfJ5W(Dq8Y4iEcG{^$Ex)h$0*`GdxVJB$=^6pp+-k zw&8|h=eN!3*-dQBCwROFZ=fo0+U8{Ho`Yq1Ok*992O<(>_2h?J@a~D@0Akf6*2F*abrDT~^@inCS{<@d_gG3|!}OGM3Te*MABwNIO4!K9-QW`h#d`d|i+ur6 zY8+BEkYLg~!k^Kw_9jn<7wbjW(7;O@8BLA`^0IZ9@f9%2XXJCiMc%h%B+`RuBnl=7 z@_4x~@ZMs>BKi?A38JYWp+}v)0jq5RMp1r$RG8nKv z$#$w~UZJNg(xdvZ3L0NTy!o@yF3v#r7(Cdmrg-C@N(WjX`AIx#37o z`I#p6WmvOAweg{8OcxM2V-Fw>@E?^7=)7fV>%$-kKDg8>eE9GODS$74@Vz5;bjXl9 z{05q)J0N$VPeo&bG>DhwL&Wyr1I1MoTtHABL_oBHv zybFxI1Pv1|X?e4tt0XYa_Zj=wDF-+F_XY)eA)aI>GlMcAnj#syyP3W|in*d4<3PhI zAM+F9ObiHSD}iizy&L7$Lyy|s9OpU6Tuky&E8ePbD%G!zWU6{|--M0G%l#_r88 zk+&2%!g|Fo=-02S{vB7z2*a8Hl29>1D+8YmHFW4{E!M-E=|S7r5+786#s6&UdH^`@ z^NegN-uJ>+zR4)qD4%80=D{~)yV}mQ;8FfPL~tEb)oR%!jLkG)L}FKr(J<`T7W2lB zx-E6sth~Si{X3#k(^*}&ej5$}l13PP5Y(}@wB&+oiOAumOjG#Ss@^eO265FI1mFLM z>b)cNuv@PvsINvgYt@pN#VYX!*~uA--P_*RKGY%F0b(ZA3RAGLgl!y=CHJk+tw}IO;PQALb?jYIIu5!zL@c3uT7OPHH{j6hj7qlXal^ zD4OuhlPd>2I$*(r{yYCxm+kF$->Ht87rsyDC0o)zJMz)xRgfk`e6Y<{8(X%V;l>;_ zpe_HnsK0g%dh^ckWV*z*WzyCSg1nLhP5O{Kaf^^AD-^nyUzOelC*6OXX?94^A%DnX2i?-jXF`spUBzzdsx~+nieDx zd(54-OLA5q6E0Lieb4ek*b|;)4_#PWOQbniw@RSoB=!{oq!Gtfchf&{U2xPKJ(uEu z8P5%pri71V?taIbb*V)@}F}4i_!F(z|*Cz6$HmD=Nmx`3uI+C71kS^ExY1?_kRxod6b@T@1tRg9i z9n_Ec%`b6+Jnx%c(_laa1I~Jf*F2E(x1p(}d5NG?&}S z`(|56oK#Fo+!H|2T1=sRzaX`yYZ#4>H+1d^-~SWA2CDd^QE*j{9jQ9e9M_YSmO<DdkS)rPh~Mt`m0FUtC#g#c zZDY?AWF2~$phNo*$t~pL`^dNwzfLGhdSAIj))s=8!gY9y}YuWtn2;jBImA80~esz`gT~Dw;h!KBjeTkb=eb`%9cO z!yUo`xOw18ox^X1(oj03dvqzl37`1Gc|hxB?6*-i|AY|hWB}&>=Dk9RDk3-HTLNPT z!MQyRzz>=pKTNKDNxnfIwj61lG;lz-&^Lx6fj${3z*v=ihk#>X4CG#`(z2$9^G*MI z8=<`b0PPXTeiu3%)D%#KgVe6O10h6z3tT2we$iVpp>-bGNmAH=l~}DhHE7F`*f?Oj znu3uL=sgf`L!qeB@HRXIGLX^oeKx;M@Xmii8yr7@8=&xemE2@)NankdgqB{2d|9HJEzyzmH>dg_vw=akZ$JoN}(pRpaf-a zK6FMm=;J1Vj(4?mL*%CLr1GmzU>g6!s`y_(kt`s%pg`}er@$}}hg$KBH@^kMD0um; ztwl^sk#6q|+-}F0{)No_y94?B0Z5`n9&I2x3I@4cEGK~AfbhB@pa|za!`%NrZ14Vg znjc+v060kWBD~j%2i+5c&w>YS*TwFBKJSRzEO?K)C&#aj2jv8=pMJVaXLN}u^R zkQ)WsqV*x9_<@zo%$;r7th8N42Zk*q4uy0Bo1UBbDDF zz-3+a|7A!%;sLiUF27L5LIjXAPM_&fv77)RZIbiEk{qn%$^Ux?u>QsRcrWBY^*y5F z`Osmu_=u4+4L~|6hk3UDje}mL0RY0(=V`a{6rcf1oFO03w*tt4NKb*g!0s^>?x;Uo z{4Y%GUpEh6y(U|M5#`Zn_fTO*$tE?P4=@6i$KHr47Kz$lRMag~Q(ph)!2bJ-Tk^oO z^}1JTDxUxl18o9ihkrO5IFd`KrvO)c*)p5`-_8UbyB?j<4;6uz6C%ENA?ELm9m;{# zw5Ye`e}J|B=tVz6(Oi*-;!~aGE?5*)j3fZJ%MgR!g^%20@gp@+|7A3YzCaP=%#MTB z#Gob|V+TN-<7P@Ot`6*wMTna~q@MQgf%dO6{Zlzo*d8vpua*j<*r7s-Aoqjr*aA4d_!4)*H<|s- z<8{GNe&ZgD5;$3$Uv<{dG6~-v)%H5?TV4WH;!|zB<(<-TL!_XA5}qr?qdG)zcVp)~ z?SPiy?QcFPENt`^QO|9~C0C`~MJ~$FoKJU{3pDn11 zl{}Oin`G9?v4R^1xZEpXLoUvCpyhq^_ebT~nrUQ=${Lm|CGZ<07Z4$U7-qkuv_cMm zb&i3Ls4J}l?mr&@p`6V#IHei5T~MbhC?%2)(8eua)Sh`!?4WX*D=wJTl3?>jps#)7 z$ZeAfN5cVwNWsErqVz2)t*iChEgQNX>HyKQA|2ct|JI{{L?N=*B)5*_x+f6d9)lCp zk#tl26Vo0+t;W{+3i{K6-2Y_>_CG_zFH|Y(5R7H_GW}oPYTzNz3Hw!`$a}CTJo)zv z{TFqLC@jWaOBKzh|Heh0`2yQWrr}D>_l2{_jssgrDu19Gg&E}=(9qQDNzv5TpQ$h& z(>|Yhcg+7g0BICcN)!ecJ2sy3D+bNL=~JnpkO$dmJJLs!->diO#U~X92M06HEw*cb z;W0rG32L%&v7_Es0eRCd0uJ~HnE&!8EZxuKnTe#lmO#GGCo&fI=5$S@*&4-cq6?Uz zwQnM;@CaV@f8$jCK&aK>)jTpbR-Ol#bSbtv?T+tMs?1aU`;ZK{@L?-epE(odez!yt za`zi+arKpdzGJm6*F)!K!A*R}Y=;UQOZ1#`veC|Ppms6Oi$gTi{MqEb32z?xU&qO+ zBV=&xQMJevH4}gup=tk8Y=)$>vvWaT`DUx6cZ1Mh;y2v?jgWhI0JjqW&W2?q%5Z^e zG=d%;xKuh+t%!WC>=nRD?;rp8=l}onPgnnhXtgJcc0_*UOFwj@1cU{%Da%!KN7 z72_oGq|6k&RP^S%adGd?DwYVi-|btlT-(qs|CUg~b}mGvI5o`sd9FJ=tbIJP9jx=N z zD@-O$z8e3p2cxV_pjCqXtW0t7k$wYY%nhSWEl7=u>53nPNrWf{Z?)m>x98%ErGq@k zv>~wJlwDXAjgh2OH?6d&*CbiQOmR=jKI4PDu^EJ1i7YA2FJ?|f@#}(2U3ix~X{K(P zt|J>RX&@3?hN?IW+zDs}9uQK5SDJa(oNX65cHf9Vnzy3DhpX0dqxm)U?YKvmmSa&U zvXAzrrX24&Z&zDf^#ngOY?%!&|KJyydhL10x~tRZdJ?aeX0^;?iSW{S2X00ewy!7& zdQcLaYPMaO-$}TeCLrD5^9!$ZZ6*z*xUB1ngo}P6SZ52^eUunMN1TDH|8-txLO`n&d8#<}Ck!4WoxSP~J3n1V! zf6#7s4Ap(VL(J>DiLvmdkXwJOW$bz56HCT%f$z(ON3d<2d&_IJP8YPL zE9cbf_l(-EV+Xjlt9ME(ZAh7|(#N-s3Js76jdBC#pA+EIxS*ET{I=m015FAChhO5$ zbQ&fVi0|8Ud38^6FB6g$6ISH@Z6TNNrc(%&pihf-A8~cj>0{5HRgvcbny>Ui_W5a5bK+m&o zgq1G8SNe}P$NHJV-g*-m&OLPfqPlNMQ!(mJfV0b97(Ej-FE?aPSJH#`_*XIa!e|HleH3Vl~F`de+K_>e8R;iVi*l&R5 zMyu?K$FGr&Y)%u&1umem*!LmdGH>vz=iz9N_8>7{U!iA!l!TObV()LGMGaa6* ztUm<|!sc8n?iRv z0iVol5acIpuPGL-x3-I>p`-9#b{#WNkXBOZug(MV^hc|hXHX+KV;k_+MGa8%&iGt4 zUpWLLJqOSS*ul~0l0_eW5;ygm1m^}Ah!bb$?eJVvFX-cw?nH3U^Ri4KnBs`vEQF$y zkq{;(vSOc7k-1>>sEVYYHLeY>FLX608RDH%864y!RdzU@unA3bvX}`NG_>W+-}e)b zlX{jp0hdZRQPH8pe%C2;jOyNYVqFvJ)Ck#x#52agFj=}I{mIN~al>jlJBAZy^Jb%I zaz!wAPyCi0PW)u!M~uR!b|<<=Dad3WcTDlQ^iqB&{1yAy^~>*GN7+A<;25yz2ApwA zmer9Z3O-6pfi15exkel7&zsar$7MuFuK@BR$`eBr_K%Od7(0oJFLTUEE$k;tOxnJX z_jQg#g15_G8$I>K=Sdqut2B)s`y_Vp#=*tZ8Lp-xP2-2y1j+MdI&AGb6CD*unQL2g z=$^yxewnb}UZ(mwrJi~fESs!Z#SLiJKcU}BbY6OSqv@RPp~+hZxD5v}4mHOBlb~f5+>4!D~CyZ}ZHVTwOO_RX6|quzzejJsGUO71gdc zXZa3tAywOEyuZAw_yf}_%&KKa*jD|8S?`ck(N!OPCeluolAy={@>OM)KprsO^eel! zyZ?CH17V0cfG{;V5|%}tok}tuwymx4?fa6T3e506lIoYuAX=Q6XH}v_ zbu_^a;6;C7hOE>Ov8uuww3}@HZz*x!yzT0 z`R=8G)0@ZMY};wgmk;ypU;3~N3w%n+ka|=%qR8z~M#8^FyY^&?hR-vY9FjurqU{{x zI=$zQUCDYsT`nJI0`b$f*|$F%s(vza41OI)^x3IdGF6`X>*u%WqMf{o>(}<*zCD|S zbWu0>pEcOWpH+R!S<#inJ=lujQD4XOC=w+Pm5w_G{krj#ew>yb_M$$bv45iSEs)7w`KAYn-jbQ;h{4SY9^X{V8x9comj+tPw0L2J!Sq=bbbL z!*0CWc(oyF{FPE>k`JVm_tC|XTW;4;1bG3a*fEahtcRHStOvP973Jza8l@0%7p0}+ z1GQ$svt@C_31(xkvWAi2L3dUBF-g3B?02>*)6B+p&sAO}u|59cna?N-*NX%CwKh4& zxogc})<b5cv4wtfs5+q{wy@l3tY$e?IRJoo(!96rMz#d zlvoHE36rW_xe)o%sz32EYBQY`nacI#d(tR|?$M*#(55U?dNS|;)41+{+b}_RTfLCo zlAgSkvKfOD$0j4Z{(i#p-C>Kxnh?FjFNH5!Vdu5g>=$axpBUvL@=rToR*4gC=CE&g zum7s-cqrkeg8Gc~E%B6E_Yi;%iVI_Lp?U|~{ptLn>)F6}2b5buS+h^hhsDxEK(?nf z+)wFY@yEHnh3)|D9thPP+C6VS6-GRKR>gt;WL`K0o`4-G4Q$F1@FH6Kp+%M*|A)LC z_J$rswv)jjJYkjjNng}W3`S;tMQl8q`AO59Z4_^BcNb{@MGTmE(P5WokRG~6;e0ff zgjCGG=D%S4M%nhIq1Y30=uY=3K>lW^AqJr~&nLPwe23A4y()(?+vRR43n`*`EOu*r z(T1yO^KfZ%bq`gVksov$f9?*=i?P0+5FxLp=hgI>*ZQS__W@sCrQcxYHMl6ADPnci zA-D8}^SK zuo4*OlSvWp!py(C0KkEfr=?Ek)6_91#aAM*a`rqMix5%ydG@y!d(^8%v(>kAww2*B zO{C+X8JlKbgYN>Q&ZWzb7pFq5f|u#n5pp9h$@_Q{M8_>;FL|YNO;{qDBf`Ufj3_@^ zCRJ}na!Gcd%5Oj@hgeCWw^ zj9fZCYP>{`wsg)Yf72%19QnE}dCFmuFd2(iEvU3bq3Q0|r5DU|YS|W=kG<#ZoN4>> zRYR5cAG=He_@ggRXC=h9-6Hv=+d(XvcC}=-`)zu>2V>!FD#iEn8#y%Li(u)|zoO%~ zlGr(SzKx1h0&3KRG~#F<^hjO^Jq)V{c?T$Hy~8S(pfe$~C0MGnRo zcyE|znDo!dUHaVOqLwnDBGQMwTpwzCxsHcEu*gqbaF=71t};Jblw0HRFaUO%3&o62 z+IOYj-hka9jBp!zmUFx-+C9s<$S)*7el(Mywug)okhe8{;HV65` zj4YR1lgju+gH=B=X+&bHgQ`0?+- zvPKwEB$&K)zP~UhGF*&y(ve5w?;wwguAjJ`YLI-qpgl89kcx`N5`n#AXGOGx&U8$^)jvoUY zvGb~PRw>)GU9O%#2BtWjO-npEtbdPu=CGh7swqqTI|a9|&&-_7udzAXSz+r7i-Ilo zl=C>^7^jcZ=6&Y^q#%E*`W_1(5ouV$dYVL-RrMJcAK5X5d$VLXcszE4BZ97nTAwGI zEiol=KQ39qDn322&*HwGhvkheK={^(s-xPBK4e;4P^DEAmt_*D`jAYrp`RV|jfD9O zw9gVCbD++;OHYz(oTI$QeXXS=SFmNaT&Zhvx{S@}?Xu@qyA$X?J_i;6ry5HT0*XhR zccNP%$T5`u3z<1Sk!H_c?JA6KG3~aT6))J^x*nxwlY(l9Q}FT?O~f`do+f;yrK>>1 zma{)dX^mxv-eTag8KNDnB*ez%}1hktDWYyE~7gO>uCjF%OR?q}ohLG!Di zh|5p%rA0XB$Q21NJ1qZ)cN>BmBpW`)@!yP$EGdBGnG5mpWg}@m|21E5FPmAy=*08% z?YS^~3{mwL+_2Q6Z|@GJEXp5*_1hhPmUeu2h4#_r@VEJc=Si0ARB`LrhH$3umT+75 zAwjB(2p|<;*W3Qwpqp)Bxz_pgwb8lTx$#n45N7^EK^vZa^tH#wo-sEE;6e37dYm#f z?n?DsjT_9dru}Kxt+#K{{bdwY>DlWvzBmtvc%2 zn@_*F(u=%eS0vPhSWQ{F%>QC43AkJ(7(WHdo)=>frOWRRlezML!Ubet0`?>HmY*gW z@S1`wejxT!3C1N;EhzV>&Xs65ee@QPu)E4uh<}95+(V{HdxV9U$|>(vMq~3MN29de zxMF=lQd!u62ckm!@LaaZa0H*MB{EDJT=6Hjc@venWeM!>>lnK6QX0 zfGbQzy4GO@I~19q{`2Sf7|DX1YJEkIlVQnMzL0nE+~2h0$9j39Ez1t$?Ev)==m zyQCV`YLwoElOt7tt=@i&r?X#3AH{&M9YJ`*@8@W- z{Yi=R*5?~@PejlGu6X?fvG=U7Tka2PFX}uwepq^XWEaH`g|zd{Vi@y{K)K}x*9T*B z`V+GrC_?d7pjx2%&pb5$K#}|}pS1@{?h@^5Ck@}rfEZwCdpWh|Eg zjiPxw8xG9A%afb46xOdc3fe?)wnzCA;Yf{(|CcEy*TZqAXRhk<3Y)zyxliVD-7h?! z#};3!{6YOwG)2-IB)97p$lY{Jpc*cMMUz$Wot)T(fKNGm`A&&X_VP_pN5$jNM9Y`- z8y8cNwxI!CM3*0mr04OE)oa?e#8$m25ti+wI>}6>^3aNt}HVWE=1 zQ&=->y6h3PfnVLLALdCka2Z~sx5&R=3+Pw} zi)aL@Tkn5gM|*5VP8ng5X=43(KO(gb~5s$-|uq|4$X^1@+?9yxvJ zopi4gSU2>2?4F<@WM*!G^WoXbdsyTurIJjftNh1+>O|+&?W#N_x2}N`Va3w1PqkIm z{kZL={`h53USI9NoxT;X^{yqd&^GJJb6%nM>h~cCL(HtruQ?aK8_vdOi{~~@>!~z* z?m((0fcO#bv@3S4d*FZL!>l zY>1eGdjy)Klf=|B3CoKO#_A8Pbw6zKDUShnfarq2 zDotWPR(i*93lLEPetx%R`{1rGUsDO%nhci9wPi=HcIup;!XC%tyRiL=C}KRqX(r^`Hs`G=`@N1*(P4nnY2WF}3bBE18* zJ5#RVXKQs@PxF{BppE7wgZBCUC?38emEs5gnV@(}NoW6jOEKbh%qK%T;-+=%I|0OQ z@cEBn_RI^0tfuQNcbtGbth!*W54?M@#)~PXC;@&*x-@DJ9AU4!-v?r%ZsAE+TYc{O zRbA=mzFp$&GcH*M$hixal4Pba3F4J(!-BxzUb1>@3zz&_I5wP-nSgm%UQsv7L1rD< zv)yIqLO?pogrm&#rkx~R_EP?L>0j3rnY8PgSL`y2+C{2TFI4-kU^SxS(%lZv8xZef zQe*o9`pKTB!Hqxp>(4(K?Ia#fvYud&LBEDuy&CuL@%z|CmvLX)xVab#)p)9B|C#u+ zqyPGdY%|4`f3WZ~%TL~JX5V5%i=W*5Eq^HpkfuRImd}w+%CRRI)m`E`e1MSGSVnJi ztNP8-smsro&EdCwnkj#tUEd!4Y)OY8;*LtIu)*Qb+X|a2~YLSYlhe<|!Mcs%r=mf47Cp8ZG!CZ&q7{CK&gasdBJwCHN zQFu`uQCd@x(CD$I(VQ`S?z9#685pU{biIDQC71KXs~VyvK~Y)QCgq*pxrcqYjkRy- za!*LKAK^Aj^Fg&UEOzqI;osoi-#UZ)^KruQVXv~&wZi+c$Sf=%t10skl@s;I{3Cxf zhkn{KZY(z=H1lUyS6<}*2@Yoarbwx=hE9N4mU>B1$Ku^09{b>v{)hi}8~Gb;E=Jxb zDRKIZN^qe#H6Ts-0y>a zT@YZTQa$@ef47ey6zMn!kAEFfnXt>S4Z?QFsq4 zZ6{)x8SRjFvvUtIw0vDj%hQVlVXQv(mwZs6TA`1LeT!W}Kc7l7ZGVnc>aI7~B!+=A z-XvI=kRkpAAHG}27Hpuf!0Uc#pWf`AGY}S^EbWsQofQkb;0!&JwyO$WCwI|t^_;A1 zs*41S5k8DfY1~{P0VI*HUHkSh7~?7@52pC~pK$Gi>g6sGM49L)Rx9MsIMm!LU>y?1 zjMi;S_yHJ!jVXniJ`k=}gL=5GiSn^4DdCi+O|LEJ3--C!yJu~LSuoquLqdByKF*xX z6h<2mEkD+H?QOaV!z5q`#)gnCisKm_K)r9D1{encmynm^LQt(&bIx;Yuzkb}&>`tK zH4|CtAK*?N#t*mvTKDCT!BnVfK^6z7laqAt2JdAWaz8qqpzIWUSewq4K1q%xY~Y4-W9Wih2y`W+eusPKf!NN=C|Y@A;ri(Rq;m*eP% zh^i<^@`Ub5+nm*l?k(OA`PakjSSR>5_&J3VZ})1F=J}+fx&-)Q*UmvXdCSq>?rAeG zCoE}{Y3u<5E)9aXmt2Z3zbfAb{`t`nS-jt?z%LtcE~Os8S-(4nkOgl-vYDvlCDD&? zHLujaPyf4nVcuC_HwL0t_$bJ=8V~^lM~V11?AGaCg0H&IGo5-SCCg|7I`kTAc~fG7 zgWs+yn<_k&jLHTMBF6 zma>l1o%wsCyf%!v9;}w(j+2<7I18Lqwwpg0)H|4vqHUOYb9uFUSzW*_ zD%iZU6VjZ8%&nt>q^O4xE^6h6%#Q>(4BFp*K!{L4*d_wQ3)bj%w2PyWHG5ezQm0c7 zVah_hf4alQeAY$16DBIC_j(tT66hvV4soc>ak(q8%eM<3*NyervcX(M)Ax<EB}_oaSfdqPqLU_vM6;`~9prLhpL9Fbf*~ zTe-MBr+0#duL zONgI?S|D6dTynyiw!ZNh`DKP)pM&DqmV33V|4`*fM+U$ub=otuMaV~8j^N^V;MO0z zwD*vOx=EnJeJ*@fZIi{bIW=Oh+saQ9@Pv(!yRqrKS_qqH@lm`aa`Xzu?qg&e#*#^{ zGcODXUc*^CHX=9X;fLLmAvj*@Hv1^Vg8loY?<__QScZ+Jd^mcy7Tr(i>28u#v-0s~ z`t*{XZ;r)g{R0@x{{i~({r6=o)11L8bm(_*1%cOMb1IA>y=?;oI8BW zQTnmfgdLl1&PzAOt~}x^od+Lx#}<)s@gQ%O_1rd8?6AFCzyWXna{`gu`dfGGQD+%@ zGGa2+8~yE{e!b!{<3XX=5>LVp-Cwirgiz=I@T|>!kMf{-EQM;+S2tLPJu6#Y)+RpA zxI71!uxLB*U(a2y{WaLRj(=%y)3japzK2NxKa4kpYBU3A*`QKk+nR3nOW$L!oh^un zZyu$0<0k3?#8mvw+*2is`5DMt6@Vc!<$x)S5Y$VRWK{026Snr}r(?7oP*Y4Ud(|-r z7JMxfZ%BEHPB9m7**%oSDn^`^%m^?ZMmF(zhg_6uDDmr?%+D*A*6e#X@**FY)EIOl zbqHnp=ud^)`5W@3i|;!@;YeDhjj58oTBLSO#|QjxJ`s%pE4m{IlC1NN{zP=~3ssb*|)dfd?!8aG|wqTG0{dsuI>3HKx-96sD)3GVGoA zK_?$z$qn#ObCSgwTle!J%1WdrVHe;!c-VI-_Nd7s={8zImjT%F?HfYo5MJ2ac|!>V zU7d^XMRmXT?L%F`4tU#cEt22~U9ptEHaFnP8%43~v$hx_+8>#vTuzC5BsjJ5sh{5S zy(8cr5A=$R0(=cqbCQ9x5Di>nfjRJt)!e5g?>K9L_W8jWWap1=Kl0=qts>pq=?mLp zHXn<%ihX^hnzce-hxS7N$#+7xVNs$6ISDbM7VYG`|4f6u%LYK_N}QG;8oIb)8~QLWHKxESBz1l}&{6SE7%)pcYaaB~S#J&@W63U#jf{xKtYvD-u9|7~ zPMOASzKwj{7jwAYNiqT#JUKki^0qD*2~4X=&#n|%yMA7SA1D-@+akKsV^2Hp``qbu z7ZQhnLN2y0-=55nxf><5ZM3YxYML3KK)h^p(^~{h6Ml5(g}!L`K%A(+Ij=3gS|DydmhPng2(Mm&gdFkUcGl zshy%FG2euTW9bw0G%SKhV4u==9$o~7Nb1_QB*&+f3`_sq@23$bjuo{pvQ0>QW+uUN zryJBERX#CIFwTQHNLq;bflVTq1uR51dtoRXK*ibmPFGe!ZY1nyb8idVOs2XpPV(2- zq*T60OM;Zn_b3~=MiXZ&)~L|fCV|YTUo>C>aUl2Oay^7%R1cTtQ%<&rWVkrrxP9U3 z&aG(6?Sqd#$>od?aM@^o*8|N7aL9Jty2;6o^KHc3RNcuZv*u~z=wDDJ+=&xGya#>G zEbIO3pgOK=t*|;9W2v=L!Fr0X6_&fkV%aax+bh)`Tb!500&r^qoAyAo;$~L1A(yj#_g32u)t8yNNsh&cQsOTcaKN zm3N#AVzC%7!*c*+3Xe<;7)70%%kZM;CUgixq~SWP>?@7(oH|5E3Y0=EkWw%*ACjpC zuH3`TQHm95e#q!IYoIWk&j5fi)YP#7=`!%o~Ic4ud+$_1;H7Seb8w z(0)|QeVL9kw7#sP`1U=PG=H>hb_*viqvBab$g3W`M@eyg7GxZViMaonp;tVc0elM*RiT1C z{p+h>8pZ~c6(8FpG^8HuIoSNSZ!mnYoBeAq?I!{VN@1DBb3yk)pZYWrVoijr<@MH~ zN*eu*3ftbs%-&jY$s_Ol!QMsY^GHX)8RMS&831Eq_Bf?`LuL_Kty1wz2TO7*`jJ~O z_Dp3M+k=8JvRNTuqL=UybWRo+Ik?e)F(#+HWkgvRjT+3SSX@?wpY;3ePL`MW_;wPb zvnYH*P;hLzX2CkYreS@g-s&d?ZneeTZAr9l|B^zgiFThY;FtnAR?Pj*hx`N4n>0Yk zKZb*OzeiA+nb~-$sn-3kIzJmn%58`BT*x;vk4jBIrad_J&WU~Y{#=3^`{CSXrIq5H z^v~hMvNJa}mIeab3{yxuF|9=0#&5@@!rMBTlAcve6Y*fMKjqw^%ahqCc86Dg6`CW&PGNa^#TMs7&w!eY=x+*J>CNOreF zGu*=MZ~`{l``rZ_I*Q+K{;YOTc!4VT38DYsogJ9bTv+cB2SAJrLCP|4n>A1-9>I}V zo8O}hC%)vE(=yd1$)o*AdUO{`%z&Ut@C7N0M%QGs@Q9BqiizN+i5`%>DC6AFAKQm( z5(Pa2#(X(zS#i?>*Ko)Ixu{0!r0>z-xZ6!95px{G{6{I%_`Y+d91V;-fZ=c|NLYpJ z37Pf%YTi@CyR?(2p)RVYqlwi;Jf=n_hFl>#Uj;z+x#~WLhWzyY0JYTFw_NcKb%8)i zrWW04-fwXyF(L9OwHl8|U#obSdYyCg>~P|TP`q_vod`cw4g)|QG(H8e^LQyx87+7% zrr3AID##+UPMToQ`;a8)ABPhNvL?;fo~TPHRTa^2e8W>HP&clROJWXO4<|xN&dO7d z<1*#tXFPXKH~c8Wgfkp!F2AhU=3-5p5#qyx0~hd-Xb>dBLr!z)lE!1@@U>gVw;IE zk(b}#VU_Q5>X8xls!%2)g(Hvj27S*pItC0BJN0_^L*CZP=i{hQn*8*;(8a1GKF;s@ z&h28tPZ`sPemi))p_JBA=U9USI@eXLL=(e|P^1Z$2zlq7Ks)6B<2XJ*F*Fbsw$^fXR5!(xUvSi^yzW}h?*5`OL z__3Yn2V<4;(FBk8dC{?#^rn7bA$GnRT~!Rd;vX!b!h)yDh&K0bTfG!A{_pKm2a$Cp zFC6HxLpG=zMyH)ed2w)*uELEzpW`XEH?vR%HAnQ>gDQIdC3g=W6|BQg4qr_iYBFUe z$<1U%ir66GZ`GKM1Wzb0#+lRV%G==n>Z>plF~kR7nG}8q{6N+q$9M5%QdVF)0W!6Q zY4SmiC|X*Z5yRGy-s(7DGtw1Ji8>TIpstSFB#7@^un4QzR7LTV)ZRPVDyP{r=mdv> zH{OQK)9ZZ%?Fnw9DPR~f(a^2L0~}E0`i&!CH$)KugR<##;Ts>Mh{a5z10)Ha0@nYR z+2bcmO@9IzW#YDuiVq6egjUFL8rR7r&Mi(Ri=^k3^QP>o_E9{Kc&|1Jm@pp-PqzPC z(LZZMvX6LVG9S!p+6|I6aYMwpA-54;mhAv#Bgn?r1Kv`AeJu&&B&4r`Js>j;U+f0E4g;x?(tGu%o2wh*icvboB=~>k9); zwCxFsB|b%TU3p8ul(tvVy_Ixec2JzJ;CG;N9pa@AZe*DWKd?fW+D4)^kh%l18c<%H zKRvuKcSLhb9Dvu?+_rWH=~CYkZ3=vV$!wOx$zuMl3}+zaoqDK>J32Qy@y}+PpT}YD zQN-0{!C1JYHDFJ?{jfxmz#I_n@H_ESd&DS^7@%14!FZ8t%0;X^3{%KX6~ia`gP8T! zqrRUf5AKb=dT!1}rH};DW2G>+4i9Ir3qvnL52%W2Z z$G2^tAm(g9Y5TIqmc#36z2q$lOS5eYuHz}?(;aGETBKTpB~ zamxG}BBx%DB+oZC(#GSqTles9rTUt=-{qH37F|n-LRbuoMzsl;EL~C}IQmQ`*ODLg zXUgz-qzQ9SnTEZsCSSzl$>TmCbVmjILo++>$~1Z1J9yXC$!5RV27LANq3wUVT?1ae z9&)My&gfgT9NCIE#$jhE)v~?)QgfqDOPkIM-~WkO{KY92{X{etQ(pNS zbig>=2(-_CF&wzK-Gbl{DE!+hJQ=BSrVeZb={O#(%>M0Ln$V zilD^YR09WYGp9xX$T_)MNvc^m&Om^Z6`a!e@olF~t`(|`tQMy-^_$(e5PXr!6Qawg zatokk?NILYNT?oZ9QkL*`*%tvcN-Uh*pd34Y~cgau%A1Cu+&M(zW|Y7F1gIY>!->z z@8>+U?{hp)Kk3~>;<9-WfQn(6IX)7#k>1QV+WZIN@Neu1#h4#%vQ)MauP_Tsw_1SI zmb6gcw01@R|EmVwIHl*HdF5s#->?&ye>V01rg*4v;eOXDv$kti{j1OY+b90xkIHU; z%WN>!W##^R3;yRnQ1tw*Td-Qi%>TPKf5{u9zxAhmqG&{C|9=xjz@(IGj8j_!NdnbS zs9L@PXI7Oq6~KW0uj0VauJy4uYBQYG{TC}zY6hT6=437TS-h?;fWJioKHIn7Vw0QU zE588~=nli2+^npBRC_1WKl`(1>qbvV%9W~W^sY*oVqb&BOu0j5WmeI@ZmeFKomT7RWdA?Q^bNjY$@#Yr&At9Rcn<8X#aaLF?@^dVKe!0V`O}H< zf7bTjf1~~k7k*P==eYaZemI56mDuOb;2=u~k;r`EyNL}T2cd9-saV9Bw}Vry(LMS0 zJ9Wd?6JIxD<{uRnbb;F)%Q;Ku0s_NhckrN1{)@Z-q@kz4@ za!wnabl0E2qiChppl2C4WtLA5sk$D}Y_`kW>(1sV@}xWW8U1YAp2Bx_-fsLW9*VeB z{hc&HgJy#TaIAL=Ya{5+I5_-3?E9pUonZ!02kjR?rPFLy+ui$v|Iv07L*0uFmeTb; z70Wo(@8K+FD-U>Fj@1cNGzejh{jg(O9QRsXP8M76I4m&inCMgqKrlwWQ^m3o1w0ji z1Td0VVD-b;NHzy7d1Y$&#vtUX-;Ie<_=hr^C$QLXzucy$xcr7$VGstt2qZ7*lzh#) zBr+k>F}kLR?6xW_(I2UX0Zo^y4BzZe^fXv6HKJu!W#t1EOQe9VX}=7&oGw++;QSeF z4Kg;#RjE+*k+%bOX$Cb%StC;3!ZnFS6CAX2& z@7H4fYuNnzZ1oS|%0GY==r94SCOiM$L;nB8E}4Nsd`|P>{~9p=i|!H>`uofhO8f5c zpH-;g8fWSm%RT9bm6?`nAOO^<}48HmOBOoXFkSgx{Oj^&h2% z68(3EQ`(l7msza?3B3rRfpM5ofLkB)zkx zsZIynO|Ap$|H%!~(n9aXY5LSu;wBHBHApUf2dtG@Ph6JNJwWHAj({I@%Iez_iU0mE zb6jR4F-=V=?=67SS2-N0hsOKolc!(s-k{r%J}Jl2fPv-z9F_V7hTVM0oaMtNQ5^qK zh*}j+EQ)sakh6RUP+^7tkQbA!?rq;2zH7Wsrnc}`7#o_Q_4gj+qEZ7k=tmg|s62O6@Q2kz^qz7{~C-K3kAEEdDq^W!-u z+S?YFsH>gm;Uh=oHi#f$ncOw>Mdp&#E|GVrqpT}Fk?nDm_SYfTP^;xk>E>#i!H=l# zC$szQz`RVuMBMldq>KbtuBHmBsph&v!`pH=LwA4-ORrpw~L8usrzL(%Wor%uaaQDyjl_gz{}=-oQyx|*8*p6UO21wnq$`T)1Gn+nP1 zIHEB6f53qfjeL}ZzSLgy>kRJgsFYr)<~#ol2*%QOnfK5^vJrc)*mmBj=IsSB1PLHA z1d-&J<>l12*oDcfQKMXj;oZ(^jW*bv5d1(}YSfu4zu3z1+z2wuYOiVxPhOjjESlEv z@;s|L>Vb_1)P(I}=vkiIANGLc_2+4&tfR%Gs<=q}63hGu#iDRW(=5zJ9xm@zM>&BG zn~ra?d!wJ#2q-0er*( z?pTh}o+Dro zoAG#Ly4X0|u=Gr-2B3O?lErm7{vJJk=P%9oc(yUeHq&BwVa4_Qn3C`ahCPgwZr!TV z=zxsizm{ciZ8#E|GR+weKWon%HpI7$JpF}@Hs$9^S8S>mW<#*KV~~7u*ryp%FQY6c zZgA8pBr}Y&8f)#@A!&GK)}qlm1(`86${9Cw?e z33)hz~kTn>L1(N%j zE*aQ0OkK3b5AuGz$U>!_I)4td-&N_+vLnKx9pDt5A^w0(+(`h72`lW(>1BCtBJQbH z+MVjXD(Yy^dWzZN)pS)~&4vNS@}tgYou{S4P620!FnW`SHU;QU314fOFBF{H+TN04 zi#8~gNGzOwXcP1>Zte8W!spj!BJ?;C+_0ImS-I$$#QAWYjgZa|aBb#H7o)YK{UbJm za^{{lt7{7Q;j7B3l7vs`MPG?I{nhuaJO-EYZYflvCgbT(C@zwd7a2D#GwSh9!&m^= zI7|Lbcm}k{D973@wH_JLOyNfq&%TzoJ}B}hCd2WGVi1&tpwwgl|`q`2yfZJs2)E*n5}xALxVc((qhQB5?XBhayxbuQWnT%Mr4Li&#d|)DkIHh|pC)T%X0TGQCYo59 zHu4mxCa%9CmU~hO?VT(l;(+993NbiPjqHDllRzrLU|q-k<*i=`6`&}{lwh$!O2=$ zq|^Hprpf9BW6o*Vd%5&Qt$CI=^SF`ce784mu2oN^>BO!va|W!U#_3%t05eiwEyKfQab}Yct?&G~3&X!WT}pZ15I}2f8sf2Fx22 z2DPG6JuZac;He&3gkp)?hxNQc__oMBl|*NJN3AQo*%xV_g@Cy4el6gdb52+=8xszB zLq)e$EBk|Sl}C}60G8pT^62U4f#%~EdCyjwfAwI z^AOFzQ_0tJ-7J!RN)YesEMoqbMg*$VB7=%A>$k0f)C0$_ag5_xt%8x?S>q!nkc`lX zS%rUzF2D*rZ)>M89?^;&tTF}VUg0xDMhYB-@90e4KQfyH-pMjI{k}k2jii7U^Uz%K(eF80_Y(CdR%kzI~oRvBORL@~_z-K_C;Gr#N zjaxuNa`=>rJVoP9hs-ab^C6}}HKR>~=mTAHGXMmZRuiq(f+x)Gd{O=^9X1~C`rz87 zReaii+jP{DV>bPxk&DM1Fyjheh57$4!jklbD;%I-w(#P>k}N$~f7oE!8Wp z|DQ2?h^la7dqk3N+4$G-t;Gws;!ztYZ)zzw+mBh}HIrP$2{P_D3Hu2l2-kbpTeisJ zh?#0~2Bm@>AxPIPCRIY_d-YH6hUX*Vqv_(=l5JMOQ=w!Wi}{juR@)wtGu28IJNmc<#sp*Y2CKjynu~)iNuPk zZo5VsR?A{QGIE3K&T5lx?3UAS4rEGNMnhH)zD0RR^Nhg$l^@#!kW%r@Z+8Nh&Gk;* z8bs^b`b}o}2EI%rxKQ*RuJjRk1)@cbg{5;0+a*Nz%U@J0z_P99(BlB_rqW(y+yi#f zNtm*oT8kuMuj!&hVCVmZs& z_9SRn3=XFcM+ImR&H*?{FV*Nzx>TW-pV1x>@15K8?RH-8_sp)HIHXbvTM^ntutUAy zo_qw$rEd_I-H@$BEQs}h$A*n$V@In`m9zoKcgcX|WPHHx=g-11fhnyo7JGzE% zPhE87%Hvyd^=DR?LULp8HPkLYcfuqlNJP&@^qOoi$ev zd_GMsT@IjWwTp+#I6LMcbO>2`sP%mEIPChSMZC-NPiRp7IJT9c_0H)QM;AG3Y0Gx4 zCEK*)td-y~RqNvM%b8B!?b9WVk)jW-dQr&8m>YJp0gPV)%(eqxfA4LJ4vQxY>Bv zvM_+p!P2JE(3%G&Xg?rGQt6-%z;cl zk#TxJ-a5?m06b*SQ%dw;-p~K`nI6)Oa-%xT>La*Rx@D!Kycn)80i3cnp-)}RkFKM{ zS7j-L?RzO5f!ryQiK@~M^eT`Aka7?2$7wD@SFBdTbN zdYWC^S*ee&J_5(Lm}HlAEHxSxDzgEN-c&P|j)2EksP{Dkpe8;9g#+3nY-;z{W^*$Sk411FJ-GYfZRy|&MV42yBOTb}v zf38uj#w*L?h$tOi89)U)$j4@{b5X9*d^>Xfi|$J>%3qE@0`-uDO^T=Wd5+~G>FRy~q`#^fl z7?icJ+WK8ym8Q)5@T|NYg13Eb5$74Gg)vy!QkCl$fu<-0$pEIOv34F9A3lc~V2UPX z8@lF4D#edPZF4W9HszU%t-XQEKv;~vZ5sFyp^T7MNKE;{4Jn-Fp1L0=t~6m{aR(6V z@&M=xWjjs$*9|g49isGbQ}^^UnW)*aZ=Uy z`Vm=0k!6FxhG!n;8vXpX^Fyj#;i!)5DK9Cl?p^Pu(~FCgE4Dg(GmG0l(qWBV9yQ=M z{5iVW#st%6-ga!hB`7Is(480}bn03odSQvG9%hL#lftWs=NVaM(mClr>3~N&`p79% z<-xG?;M2;bTK8GyuxWMGpM=e19KuFZUm`p9qumgvco(*ht=DXjNnl!QVXDvLM>`NN z$Psljb_rvp#kjBEy$j^}$=x;qX15ia&6!$^i>ed&AD)M@+CJOod;H!#NML`z{+LTp zbcPfK2262;afwtoU{@WFz)`r9@C{j~VT=^lWqD?ee5-x#i7r^ee>gen$Oddv)!%!c z$M<@;OAvXpU!MP50?d~RuqI+AQdlJdY6&90P9*OOu%w12VSt1eZjBCkZrH5f8NFAU zOrS4RHH31aQ1C&BMK3a40k09k4mWf$YO`j$yux(1^aDeX&Z+?wmr7yo=BIgdnM zId1XX3L*-UAFw{4*5GJjs7VQZEt=s)4!Vx&IuBTQar^R3tgo-DRhrx@RaVRcRpS`V zAI1pJZrk@Q&lpznkLq-}%$n~RZFa|On-$JJ)Ab{!+ydrAGgh>}^CMdK;a*ltg}x_*;3(wej1519FI zcV#?Rya6fKyxFUr$cC~n!ZSf$SBxG#rhw{`xSeXpGt}SN-YzY8we<%{Yn5XU*s0^( zgIh$=B1PROIST*CAta7Bl%=3>re_ zpV?gybkmSYCwtm+xk#?pK7-3Dd&!la6T2ku+PZxZyq`3DXddR&5p}H5{XLja_jz7v znVWC&?V1F+Tmt1q*G1n5spWbR`p@-!fJ8w=g~$p1ql^_F&N%z<2=OHcT)>N7SN0;l zTt+w!5MoEQJIF9LtP*<@;&cS~BLq(7g#^!jd8`$C&RhK`!fq*ZLOR;3_kxyWGM!ZB zkuG}p&ZxI&T-nPu_92{f0Uks+l-^FtdUD?)Mx7s81GZ0WU@6w*jNnu+|!0@WY zEzGTx`ca#78)+~Lcs7qfadp4h_?Q<+(zl{l*pqlGD;TEc<*NUYQhRwgNH1s!cD0)y z{rgPUkM`cxmo_fihLsbjmIB*@;z+k4PaJbOriMf7U2zcAd(X)Mj#Qq^gqgux?e~Qw z)oWd(qDMYw#jC<~De^D<-git*RzHn%{s!he^` z3wSOJze5+{*b5ONcxdGJerN{VtXr4st19un%bjf`xL*? zuLoVAgHXWGLBJ@d?%}m`tJ#gaDYlD_m9>#Hc2SiQC}kWn=-Q_?9W^+%8C{cx8G`V(0}~&X#z4`ra%nDDaXapxWOkxsIg2f z5#^&uO87jo=#$M%$`PBUyYI{Oyc|qEkft0s2t})PiR+&>8yo<(9p>lPOAf7a@4wW^ zwYp#xj@q@ON$M$9l^}>CG9fg;Y|{xngv2EHJ^19?wb_NnGRQ5Q^1M)NYZqZ7$syGs zE;~`%y!m)r zr`Tkob_O_%Dy?-TT$ugJyr}*Yi1#X27*$oASMT zuk76u?S+^yIknh?y=)FIM-*d0OX(4jG4K6xk4?%!! z_B@wn>~9P?u+E_QZn#{EA<%1s$C=My#4dNxgCbDL99)#fW;)bu9|t1fZOw9B)i|9g zvz@ZX;NTiTT-0g67Q^Zsvn_I(=Uk;MmYEYL(R|b2HVo#xyv)InL{Y!H@PA z>I2j%4Zdp2 zBgO3wd2EFE0$7Y87AWkdgxsquBpR+kY6GpmxZi%f`bK7H3O*$ltr8ciKhlMLK8Dep50q9^K!EvtB${=WDCdX++uO(oJoXeQodmvD9KR{2*juIkMNiA0;JhH+1Rc|{Vbg-#%Pp%UF zJ8L)r;|#)v@!PE?Q9E1b;7QEHpb3ZZ{HEHxy3`O>UET1yBW-1vUgMOs22ja5Z^7&*?`(zUk zROD5Zqyb^6ixT}zqyqY2flLRkgLFXYxS;r7wdky?KZ9Qpib&{EUaAkwYFhgrh87n& zDvZtUmx&u5DBpR7^9OKE21x2reD=dR1~z7g=H#f~Z^upYmOt{0i{KCp*L;B#YA4wnQ|`?hAd)Hoidt12zs$XZ~M%8mDxKj$l59ZMNZlm7L#7IF=~)( z^KMrSdT%Y*Frt?btB#lzFN2nw{tc8p6~Vn1s*;#S54Hs}--UAqlytPzH!^Bv4Bl5` z<(uC75K?m`jFDjB8}(CjO z!sq?e>=f>#SK!b&u{Gok=a4-d&j4lwe9d|DmA>5V9p%P~6->BKvjp-Z1U zQA2xmaNNOgG%%h=T>5bmG7-WpfsoxvAizDdw_VA`8YkB!ttb7)1|)vnA`@eZiWo!D zc^jS!FV84rJ#ZT~;GjL(z^0XT;Vo@^AMvHiC@DB&lZ2s*NrLzykWc8NWx>rHEV-60 z!%i3VQ$zq7MlWFLxWBst=c6j2?f44)i9Ak@RdQZ>|F@KIxJP@^V$RhDJh*d0tyx{L z9QjFo@HMA9W=A~_1BQuw6Eo^{zkhq;j#N8!v2($N%Cjarjw$oFH*(j($f)E|!THyeK_oZl4> zTP^OuQ#7+BD&T*j*9#Pp_gZSQJxfs#Rv2OleO5x> zj997-R3!K}#16(2Wym(-ypb45&5u13KiLr`h6Tf-_;DsB<6X8UP8!d;u+kUYyk}Pr zTc}lYl7YBGjHk$T!L~f{HXrph$8WqE0Pn!Kkk7)H8+Lg=n7;^)qGKUv;$H^WUO|#< zfM}z`Kz~DOBwgvqbTr?OZWt`#X&LxYv2Z;JHlj>!$SfX1?A(b87_xksv8dIeE^24E zqJ-212C5OOde3TGIAx!A63vXU`Y@QN#rQ9ZA-bORjHpxFfWpoCcq(MEK#sOxom8A4 zZ(MlH(#|g0sW6b;TLe2yj-!htAoT3ZvEE{z855D(Y85@ckoLH1nMVGVYm2b;h5*#0 zG!9ie13elXVrJ9g0ki>!e0aWFk)5VtOSNKU@+XL~zZc_>i?EzQ^?6ixCj!7pg}p~_ z{)pP>O9e6#C4G5vYNwGp!{9-jPAhtmO=my9dZYq*NE8HBtWdt=nxkH&Ta+weyD}wy zlR;xL)#*MP(>sFu=+Cu2eF2xh<=xV+b$qnz>+l20R<}~2b=3iRZF)c{{b6O;W=y&( z4sHHtN(TG2KssYnPA4}uF76?_#aR=ZvrTfOu=L*Z?wsk%{1|acoWv#*tbqlgP*2nH zYd^rc2;A@v&}H`qmrqqDs68YzWfn8_W$s!0qWG-U>6o!sR>4+Ok7 zuIt)n=`~JU#v?Xw<#}u}%zpe$QWj>vywJ`Ue|RE0H!=fMQetR~8+~RTVS&vv%=#!( z-V3NOJqhALCtH{vRBwi*(vQ9(T;=5Wz1tHq|J%vhL0KYktzBAus7K_+;}} z9_sqBhf7UNqTb$KB!Q_xeb%KH@YKP+LTZp^a5u-?I}u#nwS9Ouwmf{RP^Y<3zU*Se zDDWcGq-dyzQCI6~mz6tT>n?&Xf;AXV7?mjR9`WQC`N1Wl@`e%Y;wrO=c=Sj?7JtIk zR-6tIxek5)I3t|iIUbUI#AYB%p3Htm5081{GeU9HTeA(w4ZJZkToE2vxa0f4LrptN zY_DK65sKtT#zdcDpV)AtfzL4_c}^$4RZZt>dM8s<*d(Y_8ZP)ZR1G@Er4Zs7Y~?T? zGz9auC@ZN7hYl9sO#b?{h_IwMpAP zF*f~1Q$k>ioa`rACbBPdoWJR2)Ww+$y4WD?=?H@JuS(QBPvLp`Zqzy z#D+gKyhrR9^_f8k;cm&!g|NJ;P+N#Ok@8b(e@f4dToXeVpIBi}2kIR{|3J_r*bPxI zZ})8|E1?fpo|TZ|>XkUc#F10|z+0*DkEl)oj!%l$XJi!X4hA7+2|27>Qo{!F?+FWa zNSV4Gj!LzCH{^uJnQDo(NlBIvs)DXGBnLlA$;r~ksD?uk!r;RxD(%y2Q-CmQc}YeJ zdhrXQ)7l4hoV~B_4MDsT=Q0o+5&dzh?9fId&?eg;g2_2(vg#m6u)v_OL5-0*O)uRDh6`b8KQ^7T^nKwsve1ZBZeL$A^3k<~!3=rNjkh1Ih$~I}t}&c!dK zCkccyDT7?5v7Js|;367UQV_8nh^tmeVqY`I!WNgMhv$$s+;Gx>H$+01Xcf`Hk{93< zW4EiH71;MycFWT5{V9SlcWF<&b{MD~-?rdV(#+tYmk?>$OXaM_n08bg?bVlp!f}qc zZqabq8hm05UORFSu;98Xne%c1T$QG8SDr2*Wor*L9pL9;pGV;Mq|oPRdroVhAQLWx!1Y z^ne^(L{fi5I3gr!t5Md%UU}6n<^vI?Hz;=m9IT%lM`}@Sy{4fNfWl`Y*s3X=fgY5XPr`);>o${G-iVRm_l3_gzl9Vp!bwSAdWzdi0c{I3_G<~80wqMt&8|VYMWTctG zjmv*fJ7XU4H>luCP%T^rOeWcM<=fB=T@+rCV@^2@PzZDp%d|d+8)z|j-!o)@%k0Zc z%4aH%1bKedj5~Y6 zNUG4y;{T%Tx+>+s_TE!ETXw`46fuHDupe}5&oO#XHPjb0d7o%y(B?C$KjNWJ1b}ax zYg~P%JYLzU_XK|YPBJl8ZcHrPRuh1ASJ9>3MxtK10h656pI6sP4Pp+)#B>_A4bFB} zCBFV(7Sbm`K%qH)H#o!?rTq?b+-LxPbC%+oNQ^p22w^^IojXrFPB)P>P>HV+=`6gZ z#UceS`Kx;C-v`wPSZ^G9s2`nWQEt z2ykNvQV^QiG&!RaH~(T^{)a?rq(ZSMos549>ruDDO{(i)K~1H=oxxjhxYjqMv>LMyOKk0{2 zfBtr@CL75f#k_8BfZmYK;p=NoKnQx2R)*%%s7%3#KS47Ec6ER;rPiOH-|gJ z1Ft7F`qnr}vEyp2q5zj(NGwVjG$#Gf0C_cf&`X6%H9s5Y3dX2j$1ETX?sUuf<)A*M zOuV#^bCtpG-MR;z-E;qK$+n|C(GCA-XDJxXP=5)ne6DJ*~dS zuo!n{^;lXQ?b!2nlzThB^#crWv)n;vCKgCM5m{(pYpNg8C@-g=Sp7ka0|?1H4RLaI zB*ZNpx^?E~mg4v}X#Nr59w&OeU$*h5yW=B@^n0BY z#5u7}oWh>qi#^H{dZ_1cUT@nE^F|N?qo0%RIC4<}`9O;0pH- zU?`ye;OhF~yW)L9G@^aX&(_tGky5{X(Msi#xx(Xk&1-qm&dJ{#Nly!>Cx6Lq$eder z&BD_fcHIDF!F;PjiyYzQCV!kQ9N1nc;Ts~Z1sa$n^n7>l) z{7G+SQn@6LhP^hWNQI@qJ5g|{h|Z?u8+KJ$=As$+>{`!v5{ciU!Hy=FD=YLV!;ll;1HLILVb6 zx=NrkwBKG!&8sP&>4a@ry;4;|YqL_eb&BMCQg!_(+I7-mlN}o z4M36{AGe-dZ^fOgg%az^Ut|5ni*2&_y^yK?3*>8{;vOD}$R{MwsTJzcW>^^*Q0G

ZGPtS|y9U|{BjX2u{)`qK*A^Vx7 zHunpT^rTSPB(oMLXAqZai$@_J_OIdVf6`80i~z#K{!n?0*^B@CJig+$>3jL0K=dP@ zr~3*6^%fxZfRI{3{rTInHiQwTPr?Xb|MT?%G|P9^Em^M_#&iIk*j~V4`kz=yY}{Lf zkCCwqk@{Xk>^haqdwKQ&O2@m+0oN>E^$cb-LjkPSc7N9GE58!%?>{{(hxuVU$z;x$ zq{ZtgCzMEYRrGr*;s`q`L8@#9Z+#d09$7wbai4xKdmuo96{)+%!NhA25t@1ygVgTuN*8N{Ego~qIZ=n)`!lyI;Ib2^K#b1Rbkbv`hT-yCd)n_~5 zG80P`40k5}HL(7E=6_d&P|6>Vem$en`@cN8&MPU zmBs(o{*nF@uZ8_zZzx+rE%^1tQ>(@}>=o4=q9+{>XVPu^&7jxu-ec%G%D>#m&(!Jg zceTw(I**1LSDE}C+jjtfoTv(#&R%5hx?s1Qp`UHC-*9l0aUtD`@E?q);=63j`4E~^ z1wVXVrUB4;RcixrGOQI<6y^OvV#_YbpCMC`4DqdqjXWDrzG&`;OMy2 z6NWzg+?$X?2h*nv+`*xYR*O>!mkZ8G?+vB#9GZCj(cbdq2yKt&!D=$lyT#as>2s&f>)0 zm-og-=Cfb@fphd0Y+bh>gg&=xm7jm5TBgkBRw0A)@bK8|WVpI)Z9Q0eH&GAPW%=Bf zaOJ)VD=9Usf|()|;sT)%9r}rVy@Gdgn_zKR41*q?A5P$PBJb2`*;TOq0^0xQApYYM z$q=wY=34#k_;b4a&u;a))aATVcsV6CU;caV!vR+Sx*vu~_y3s7|Fb~2UqESaGKr%9 zo!aff`NArnUj`CoYk0~v%?40D(X5iKrV)h8{b|dLNllS|P1XKarhd}~(lvqpz4yziH(@oM_K1z}% zrjFJ9B@R27T9ATv#7+2DreCxC`zotFm3(_C%T#$>pYtXo=pI-zPQXn$X(z85ho}H> z@+&`VpAe*RHn4tSo|d8~w}I;I66w?y4_sO4Whw_|tNQh4z|39$m6HHa0?^wX&+2u@ zSxY7O+e+2d)hOVE6s#xOW}IaXosu5MPAQ4{yti7MARDbMH&xHmEU58;RehlV=foT# zdIf|2vvd7f;r`BsU1IbV=+;SpSQ`H#)wA1}ziVTk$Dam|$^6!VsZr#UgO{E+oLDpv z;eQ(jPyVR1rjk=&=P$1%v_3@sUa6&txzCUlX$#YT&{`Z0F_=Wpa)&3Uz3bbzcOskv z$$GpuYoW4vVHte;)Bq5yqf;g)+M{L5Guzhe*iUqzU8Y*r{rsqOBgIwz73zJu$Nv7s zVeb|$w%Giq_;gPtw%m@F<>o*nhvU<3o&#-d_aG*X^+oR+p4svg$A03`l#c@BVieci zCpN8nJnO<=dvx^3#8{pct6YP%*W?Y5NfCH%tFDt_zuY>v2O|+kY{TBbphGS(Wy#_* zMO=DTG0W<|8S4;ji;3l!9sBCNU_*^<1Gju_ob+MntInV#j9Y<|+nkL-D zwznqO=J=L)v7BY42@A-hSvqTzt~OG)cG41DE4D*8Yu!+9IrbSF5Du_&?IkClkA!B- z@MZ0|CNpIoynB93*LrUmY5Ch;uTf!PNxNW31z!5qO~7R|Rb+D1l)DbIb-zp*cewJm4jT$yIESj zIt1x-?s1tcfu(xO*^%i=Lgm_AhP>RZ>o1JA);Se+Ies-^2-unDo_D@3W2A7-%4R2y zK*G5>5tMV?C3aplnSN~8+Ti}60#IiVl*l{bIA5J?T`E{DI7{-qi5GsOumjh`AvMV7 z0yy?BWqxq@JQK|~E3P*4?A|zaPRC~TG5H#8zngEC9glmlGY%FtB=7C-mXt9tMx5W1 zeO;bGG%LgRTPV9;gQB)N*7ok%rTg6VW~X6El^Dj0G#?dXCY|7{3g6D<5{lYJ`}wZs zURs=H_&YhD>1Dov{oQMfo(N0E{P3m=J&T?&+HdI2@8z=( z;e!|;HXZ%F@|@SaO6kYL7J`IDkk>xul=#^(Y-Z55h!2z|4lk{MpEJeiR;Yr{TJ&Rp zp^q>1WMw+Sn?^b~r#*G6EZ7-wZH&g~Oe5|$bGDv;#HJs=jN%fXl!dwa#&>4QWyzkX zvt?m3HNvsnmMrj<_5sFzZ5V6BNh!2*zxKumbhAZ0-aAL#_LQ8{IU}e~ zXA8a7Q#Ln1TbcszS9dtOCFHu|5`v`nD-J2qCs;^Wl>n9Qu0m>WAV}k7Yak8?9Zc6u z-iFaJG3sc4OQoRV`@$#hrjlI&c$~#xTj8cee?==zZbiwf)G^4oZy?Um8`tg5{Axzz zv^y?LOUD-!Fpz*VSZbw0kWka1Bs6S@^teiI-NM?bZ0A(CczNi23@M0ZWE^!e*c3~P zS%aR_;yO8~LJuiuI!CL{Xnpb7scZuA?BgRyF}3IKNFpaGt1$&meO{{E27E!qrY#5X z?pO;+oK3Y|T>RE;mD7Ukp_UP+`bwr&MDHoIU6vF^dEI=V)g-EE!jvB-IMdH6ORa zMlJRaVg)j-t$KRz$Lp7#;P$Ao`wH{?h_m<=`-;yh0XOE-_vhs1?FE7j-ouBJ-|s(h z-AC{(HJv=%oriR|=U6sB4cbidSCJ0L;G8EsY;;?Cx3e%;Xm|7g#~l>JeWT};7mHO@ znOg^XRd)@QnYc;kO*995Dz$51A&B_f*DJohjdp7yYGtYnw%Q&53BMGGEKr;okyn!w zt_sa^dRRN!0^~3K?P&i45_>oAQ4iN$qqOQ8iIDf!GnGYAva77BW8Y(pXZ^BMCujqO_=#%p=kWh_wmyJWA}W6N{P}cL{QQc`V}n{Y+<+T&{zYY11~& zqpdc`cwCTe*S@^y-tv|@&10!K#tD3vP}E@Lmkdst$YA3#z+Z=UKs2ck`M()ivIZQ+Qsw`DJ6uuYZt+qpboHr3HQhq zsYkoKK#9E(wPZU1=Q7V7B1}%~+X+vVTUDXA&mv79gh<8QoaIAp0(7y#La80yjBYOA`=piHWs0`d)4$7uGK2$aqR>-N&tN z7U)b{ROqlup=%0(u25|R-6!M&013KZO;y%^p%ubA-}5+GEL2<*KDgkmo<2^J$G;d3K{Aug8GR zs9c8Xf*~IQT)fj(?=)K_V<_#UtO&CIPF~{oGQXZ_Hq^Y|KIRdPM|m&raU*?pvDI@htclHLwhgEzzBnggFa`WT)QRKN*c)i(IzRWlj*h&Ys;?RGqF3Kx1`o)PI<$U zIYnWKv)eyJ`pNzAtbZw2Gy=4Sw~#o2lpB)GL6*-`O)JXQ_5I6_Gd6FIqHT`PKXGmV z!oHRN`u7Jo*!}pydDhbev_tYoQm?qhTw^i-;Q^}+q5<`T1uUarIz^1;QYl`K%Sj+V zqcvb$V-caoCW5F9@<}pSvfdYwukXN3*>~7wpd`BU-l}}V~W|%2#?JOVhg$yznU9E~Q zYG|r{Zyjf7^g3e`Q6J~28@WvU(b4d8(yS9WGi^b9)8oZ@+0Jaq^Rd%fKwB;BLB`PN z!PgjwyQP_XZ=qHAFs4E|2%8BZQd)24t|+NQ9p`ml&_>By-VV(sZg-!shEDj6_wMY$$jp7#L)tD|qA;Uv3)VlqiY*Os>`?#=Bmlh4>Yn?&3P*WsFAseVEy; zFa0`*QSU@FPrN!To4av+vT|183#06$wq?)zO9b~OLb>UZnOuyZ1qv0?OBo-_M%9*i5Oiw#<9|Mm?3z-t`S^UfARLEGJaZyn^K zjB5)s3zH06AFK*CA@q%@9K~z_RX3}E!KqMR+3*sTJb1h8YwLuVgqi7E&;T(ACnI7k ztoCYVpCFr!RP=Qic5??IyNXd7>3axU!?nCYQzKf7EX>XOVy#?kR3sb57AY@M1OqI+ z2NBx}^bcNdU9ferm_cpSml(MYK}gDajakfOxTn?*Wu9BXb$V1cgS8zbgoDHEPe|yi z-z&Pn-NS_VUK+kvpd|i#;CTpW#~5$#Lhqus$M@szH#^|Ito1TEr`aBr+WuxX5_Zz{ z_CA@h86)?+HMjU^tf|FJ%K~P0Zfn~t)8OYt)t(bJYXtm>(k2{%uO45-&5IXwz`iuZ zkeYH3((azgMa!{Fe}>Yt^$7?VaItSLrOGfeg1&J?)U+Kn*AtlB$7@)bBxVz}8T{Gl z>=P~c`SRwsE%Dd0eO_NHL11&Fbk2@P+#*_>E9w*f>YIB9H@Y^OmIHfTIGc7sBUn4ZFszFuO>4aQ##_wv(I# z2ZI@b-LTf!aM-YE@a%ey!OC~?RAcz1@Kms-aCJMvE)yeDo5Jwc1zh(uH|h7HB#DVfn5BVJ8Nj>g9)`6Oku<4#r81l*nAq33&HTnHsiQEG9X^YO4g7 zzT?uH!U+~bAqA2OMzYJKBsQ z-gnD*7c4!-Lbn+dXJ1Z=b)5M4Ua@D5Pp#QnN-iw;bYA>$ZGERjv;_xOrI}Y5BS?2c zCzEBFS7NhrzcKxc_%Q}S=28<#+5EByp_4b5ia;L=c?)dJyHI5~eSK`G(_|B&Nzdut zCgE|0Jv#cuwQ@MZy%DKc9EO=P)TNlQTK|o_jfx#5z!+|WLq!nx)31P+N?jbZUaO6X z-e6~$ZFftD*1U$n&x5{7Sydadpq0T>*gX+ehlUXCjG-eM8lKQ+b6?se$wc8@LKR$ zlsQpZX#H4&%DQCiajwOBzx$>s^Nk|VhMleY{3$;0C(BL(L4qF>ChnK=rEEjjN_>V1 z7ZY-9n+#6$_FVW+JQ3|QEBGg`+0bx(B@{GxeM=v<3gQ=YdEytZi#qTT&mU4IP@62a zShIG*3u0R&i+jwIX-4PMtd0wZ8HA3zTD{ulQ=?)4EFOPBZ**@c(28Sa9W6SX8MQDi*P_MV)8njIq`+bDK45X|L^@|*;f zS*8S=8n-y$Da^{&Um;t}dcUBwus8VBUhgeBfGQvIA_0~L^NVuNc;_roY%pe2ZfC^f+BI=V2$)D(sEBSXhCeuz`$bAyoj*lSkI#za z^OPuS1W6^0q|ggOSudpzb}s^oPCphX+_RAaKvio|iLaLxh5b69cVl@Kt&wiM+R7d3w+PvpuSDT#00QY5G*?{?Ie(VqA7j5X4`is;wb zhkJMN4CnlEO5?@qKjcKDx73<3)w}iFYBk3!8g&X-!V{ z2930hnSjsP(IB?#0J(&26V4R24v6i7l_W?ww_WK7L+y&(EYSIY^Xcb+exzhgb&Kx{ zpMA&o&b@O4^T}Yy2Uc<)Tz{{v#IW&RQkg>QVy2Pa>7702KsB>8GSKcWv9ENb2U5}O z&Ao`gK|Q}>i9&P1wkFGv2 z^I%GeW?7*0&>#A4Bb`6Z@TQDvcHiVN#ZadG2Unl@GA z|PbOn|4^0Jl)l)|a&pwjcFz^dbdoX27`4J1bH{>=4 z=(<;Zxxtg@TwF?-k>7@%_H}1n8i%!G${gDTsTSc^1xqy<>U`&Dsa{eWMX?Qnu<{aj zzZ>DbB)httt&y`QuH-*5%X6eL1}}HBP0Ng$VIbfDCz#HhDpdmq4)O0XI49Gwy&$a{ zms~_MeHBcaJ}z-cD}A&pUjAg{;ppJ7D7T-KqvkA?{u84bVnY&0&etwXqjSw?-URE6 zOCuLSJSdDvo;(!De7`?PmG#q;krR0)l&q}@@vHl<@J*f%4|{-gbKg?U)Dj1{wkVd;0e@*=|G(ZSN3JkUrVepCNs^6+l2H-hum?jA;;I zoS!(q{7xDp6;>NOoW?mOp6jcBXk~Oy?70rp^Yl$-^=Lx9?eRjNOngm)@X}|WUH1r^ zq5-`(sYPN0b@Sth9%eh!xD+=n*FNfEMY7Lk)1`^awvz#SzF6NnG=JSM8|)0+yzK2u z-V9F)ab#Z$iX(o^oz0QjlJ05+n4yXGmd6 z=b7gl(>~&0(2LWsKWzG;dAqIyUd)XR4zFhO-bx?HSee7Z27|b1&IUz8k-0 z;r((QvuANj^O3Ok=(BdFy+K`xw8{tM-&Sn6keyNqqB>@lY>A;&-5;j=2GT;awq>C) zl~!{>Q_kl}Ls?Yzv&q;cO)hQ&z9?q%%6cc3)hP`y$S;jxNAC>eU?1RNUVQ3I64Q$x<3y5IY}5JrzJ|QcfM+bTN_M5fC#BJ$=@~C7 z@pz1Bfe4qeX!6>ApCW~c$$F1@7n`$1Ser-J7)q?OSR2;6Ml}K-5=#!NEH!rWfOjwV z;k-p8oTSqH52QQ#zGLPq2TxNCckw=>X}sb>El#%v`H@lcPB#&$KOZ09v!Em5d$u%a z=3E%G*rkra#4J+rZGZLD@d=v9c?@z#eXCSxY+m8q^Z1fAq~_h-x9nGtqcEWXb)UEa zkjyc1T8e^ozno~#mNM?W>njZ?Rc>nJWd|RjF$2Or#lPe6*kz_A^g8{qY^M;ruKLBQ zRsgOq1vpTf%5WUvc}`U*uayy)!FD{g5Bd><`dM1#My73(Zj zbAN3@#HKZhY2=)gdYt-U#mwpoNXbaqpO}2Bpyj)9RsAL3k(GE(rtP$%GMc?nZJl}L z#Bwk%kj&IX{7fV&8U+e|7dZw_cno`=0-4sFE@NCSODgjme^1@L=92?5C6W0%3De7# zk%RQuvBy|7lyl_l<4~AgT8X~PI0X6g0EnW6TX9;6UH6YETh-m-U@_G0-lxK3CcEU1Fno^~q1 zIyXDW5KeHO)CC^!p3ZCM2JU5&8I5DHEO^kd)x6AeHE{($ekT{%{5*k6NTFY*JFF)O zb4uP1MHNt>mCDEwpdG>XjK#~APD|%Y_1JFam8-MP+QP>R=wt@-aG7cc3@~hIl`Ijd zHTx3W<;#Txg#S!A4P23Z+4yktU}<{m?Zx;rW5Cz_8tz1ZmyT%MA*3$M$!yiKm^%oy zF;Z1TmU|6CkTSXkmvGojJrbKbBN9&pM|43;O0C)sf{;jh>9au?^L&`cS1iIpAjS3l zsrAN2%~-D$koFjk{Oc*A)Zj;zVuC(KkBZup?wl`v<^@8fNmCMy#p0a74{-O~( z{xeopvM8YsuzJ*8K4gh%E=45Zi{b0WxWaqFKT^qHU6x)sMEQg{=APD~U>E8Oj^(vB zK*R8W0!3n_w@^oXtqXyff{TU_s_ZEE1$_jo0X=pWP)ElHZ3kFU*^uYd1z$(T3E(*3 zaQvh;5ThY=`b~n6N`>$NL1_k+A8c7J=XtBGN*I*oGY@??k}2)pm)O6%vXd!Qw}<0* zi!D{Dfv!cQw7VCZOPI6+BiNhi!b98KoGQ!+u8cXqkPjRWeNnPnuphbu6K?Y?nqeo7 zfssI5gUl)Wu5B~&_Rm}Zq4G+%MSf#e=gi}W1oWr!S6C>xw~vA_J4EQm=!~eb)s7z10!^JN-p5Q%;@(ra_mK$3>Co$#vt|zr>9gGhC|(oGwIk6HPchPxaFbkyt8y(_=>SuGG-r0yrQfz z`+drb_MGMAYXPI+%VX_Hl;Qot9NtOzdRIcxIb%*E8aY@vIUH5=@ELi~85E@5BnZiX zTC7GL5h+KOdU)W>0U}T_+Tlh{#-l2ftA{LspFDoo{p}@?oi|1zxBu((wg?kzxPyDV zo%Llnxz`)7%40SGB)nmd9}*0ihypeSwRxjl~`)8-eu`ucdomZntOu zyl&bnWD{oXioSga{yLEkGC@o;gv#`)?k$EE1*5OtltH*t8VThropySVyo8Tvmq^jM z_iQ_-k9zCS4cxmSgXpfPJsNrxWkIGO(pflcI5tR~ypw9>j3q$`;N+0f(D-^`rqw5uY5B%M&p?5iIbF)PT11}qOp0`9InU$>4H%8BH2U4_dj~dJb14KWAEqP z@$g`G(AwH#oOkO73P+3!^|FA?saf#*1XNl9FoSxkM$cBk4rm7bkT)El<4jJ{o^;%k17g8vLoYtjVPrs!nBs5 z4Y!{SlE$QA&|W92Pgy05q_zn)) zu7bYbi4mh^G0zdMj4)gHD>G1#q@0FbF(M4d8j6NmLJGE(;$q5@u6vm#Z&eD_Ab&Ik zt#j&02`a12zAcMMEM!&;&DoT5Wo3x zGVuy6WZ?_INDiBGwJ!v_L(N<_>st`ti2VR1boC6Wmr&ci26ry$?MzZ~C5hnqk+0La{k1 zN!5YfAq%D7$-~nu*E|ZJ7~~F85pg?}oqnzm(IKUsbn0>@#N;nA`_MDlc!uh5BJIo5@*g8f+ZuU3W1ypn zQDnI;yZcNb?x;6GY@k^)w2YN&)V3MW&|9z?_Q2>dS`(57edq^2BDzO~VqAyHA^*}e z5W86lrj2~W&WkBVH4o&lem6*li*L3}KihfKuy8*T;mkpAPs~!8!z6lQ-tv&{5zXln zzI`?^C^7Gc!t@?`4R0g>0%$Y>r|Xdc}mZSUB>*FOQRq3x^*eqIhQ9Y zZvAHbqM%)_|Ch%+8?5BmCBPXuu%`EjYBz=3~y|m;G}0!4AQN3M#Ydy5Na5IvSS<}@e>&;WQDX(&a{ z(uiSi!BLz33Z@k)JuFAxJLh5VcAp?7T^xxvu8yVCXI_U=Lqj17#|5eT*C;4Ye&WiI zzVNe|vT^14wY?k84w%1eN^*-C-oHMuac)cw99?pf++28$x+R;ke_(c0ZX=V8R55L>j=FL}%*mr^%`h;X?At zs_!vm)LG3bmiav{Y#__%_K!g<`#e|^6B?mkWjC_&o-^p^ih46HJuc=_2WFnzd6RbP zzb`G}(YpM4pyXKi^mW_c?5=&<1FRq6#H)vPAUGB z_uG6t$7@5e^d70xQXsf@;}-^5q>Ci+%9WbrSxUdA+#n0&qBvzzq#Png%vdlqgO%9AuZkxU$Prnf9QT zey{iq4LcLw35ho;AdW#ZtA1JS!9o!a0q3NkyX8z5uWj?zD_IPlhteRoQk9*&&EBAi@pvDg$y&b3E&}{${Pi*X z*Xqm6PiB3WV0yGW?9|^9hD&*=m*;l1NHtGDhk4mX(iVW$B{PmiN6y9IToT)~Ss?s# zh)M5+DD}<`ty;#m>WKEsPF?v#ozj;1abqIp%{}nx5HCx&MeL#G{t&~p-5fnI2MSzt z*O(^Ug;HlJ5S5xcSQb0f(W#tye=tU`Byk}cszhCU{bY0 zZbsz}ogS}VWc%$>;=MN=NpCZ_=f*QB=x{pk#n!tuM-3XkkX*dN3~AVS#BhIkx>*9` zZK*87|6s9viKxeVN$J?6`RJo-YFI`SWhWWcJ{$d#ER>B>E6j4yWGzVcp^=&3L8p7N z&_L{Yb0Q#i$&6M&@d(FChHKD6XKnxP10+CYji1N9UuWYDDN&U+{qXIbnmVi!sH$ll zu2TY*SLmB=Xf;K~>SNlT2@f8oG&J&amK|913RR?R`4?5x&Fny=p|pKf)XLe)zrxU| zEnnlaF-~wsxNB8JY^kpfmReo=EG7c~Fsq4SX8<_u@}65CL@3}H>3DkQRJ8< zzD)Gn{i|8Mo1#(nv-X-@nu=nIE;aVs1_iyZv@IiGSQeDr$@Hug>^N>Yc;s9fH80B8W(2@NC*i+w zsDXhuP`M!9^uF5!^A_zsFjfeNqD9<*$;`fQ%Gyf#lUQa{|Kwn(DZtB;Dd~*CB2=Go zzcA|g?&gGIx&0{r+^yINgCqmklQi{Holq4$^;y(eer21EW}0LArf0DI3&rAEBGFm? zyX^mr7!S{UMX@f^lkN!qKcQH!jKtY1l()X~Taua#D%FgGP2ccuaOj^^3i#@N_{jth zM?dJ-|L6Vu-KnyFzScae>u@>ffByKNpNyejd*1(#7b*iKR%^|4`o6HL8kJT4NsP#Q zAw&}J)ZDT*rvJ0$n4#c|0oVvqF@X^EPXNml&dFsl27s@4_-GCP_4wHVtU|4384Pod z7p<-*nf!^>ulArn;i#}zlN4Hk=Mx;?L3!;A6S$Q1pVgXx4)w~``~7Q$`|B5btoi~; zlF7bwOaC1ZU>yb)oT0;t+VKBZvhH8ZPRr{g<#g9L@joBx?>6efp{{-X&d+7uvZg;P z<3F(u9PonI!v3!}BotQ(_p0M+G#*5|85bu`)DX$V)N6CA*6Q$bzf3s72iNHDsxcW} z1k5#C@8mMmotUu-8vSXM_+ zuOOVpbT+&^Zl$5N^8OpU2_NXn{}fL^n1RfHDKFagsMxs;fSHLNgtUf`0(YC}(9)8b z{;y&@uN>9f_6@0B++yVt#T+SG#cstsv8cr$Saj|FXhL`$f2-&LGfDuO`sF&X4+Rc9 zgCigwb*<2>XBGLQjscGq5CFA-$8IKHZlb>3*LBY<4>)txfF$CLv$~!Ya-2yn47lI; zTr0V(2;5fvE5)Ml{!L7swBKYN%-5G$&ed%Fo2>#kA8vsb;JkRDURr}C4_gKL7QoXM z$8tU_`8*t{v4qk8C?n!?Dc8d9HV5b~$y446>i=L&fQjDuVCIUD5m?%Pxh^AxC;RJ5 z&l|_fXag))L?iPsKQZ`EtP;J>j#^UzVuoHH`PX=7b=KvGjD0Ui=Cm)S9e@Y&Ze&4t zoJ}b7tv0smPj^>?lp41``|dwrzQ2aR-K(EZfcNmf<1WCz4Bub7;dg0`_y2Ln|NN1^ z7w{z-VnUbyXAk|me0lX(2uZz=KJ&l#=RP5UR}Rz5$@t_4K=3Wa%L(iOLch57#Bw1H zBgz2a4XqkXr&_12`vcX?1+= zwhofR3K-F#j+-{3$p-sb|E)k%=Yra`+XEEV2U8`JEhhRU;gSA8#_?gGF)9YmYa{<2 zm)TaYb7zfnr?me_z(a3;iY zgvYpPSd@QF{~y?ubBsXCp-S5qi?+j=BklUK5DWzy6Q(q(q?1+Is(YQeGB?!e+tVsy z7dAoerY{b3o^u3k)|jrKN|hnq-ip3HA$!)-9`xQ6PnVtcgG$-%HK~$l%x^bfxPtXv zI0txZ;5Yth*@E#LxZKm2#3ul9b@L+zewKb}r0J=#!h{|G8G(jEvdPs6_@kOFyZxNf z;25a?4OeZl*sXYvJl$I^8wAhevgBegxi$?w+tMUuRfw+W9b)qR%1}KtNvH=yvJV|7 zSM~~_ylvWunlp(?GOV(VvPcB3eMp}y{gxB6(%7v*@$b6*XODUXtEgRH8*gz*v6bZS zI{nXOO)|v4JO!>_QY*{rJnZ3eQf{d*&1T=@=J?3vw*jrAMZ-(WOp1D*>atYiJNHEPq1T>4jkjI4=lsG}~n zIM+1{KE=&SGpmS5&*-pRB$SsgoB~8b&&)2rsewU`&B%zt_;= zgsI_`dvA^39I(Euckz|)13IuW(Wlt&8aVE^Zsq|W@X)wJ;A7KZO6h2Nv#Sc(bn5mZo~-(CH-8RkB}OfHSl z6SwdI1{679G6>EOty{H=0v5k8buPmEVzxf(%~*$7i7`MfVbKz>k2`@m>2kN;Y_mQ( z3MkL|nnDO~obl_IeyNmdPc!H+%XsulStiVT>|8uHOYgzt|H|UoXXA3b#GasOX$?8L zaY!tip=MqRH-TSTJQHlvFzT>pXyD_iAY$`8Y@AT>u}gE}`Drb$hHU>)u^SFJjDRm} z#my7AYEcU0m7jMQ!$~4Vs6MW)3=`ny!zLMlu~jr48MGRTF8+$Tx^!Xd>5sw zp<7rA+R4gdrr+8QUxE42SV(%CRl4dciqcnlBRUpEW>y0;4-sGpVWcjI`qBNKOWIN$ zWERX~{+oHHVXkjaC$;OOlWs#c?x*17vu>P`3qCA9%p!`7$NBvu;`o(&_k_H)E5k4e$I=w=`-bOH78$oUbx(=Le8TVcz@64y$Nm8r#(T4Sxomj7lu4(zdSRG!rX~1}@-fFmMe32|X&$X9ycmecxt=ID( z!|Iw9y3wr3jXojnkYgpGrkpyQ?ITXv^j)VFep3l&#=SXI7PO2|3PhM=_>EX!LMKLtX+JckM7x4J! z_Yi~`daO}DsNBiMJ-Ml#@H6@G6QPd51~+v`q#y`Hnl2zyNw1K=AzhhUyv-}?g7X&| z?f0R77Q~<@mZ;WtJ)cQf)f)$diKRL^wYNEEn>9b8mW`C5HyOYfHMMiM&p7c7Om3kK zV;?F}rJT@F_2%W2<+L|Ug+I1sjM?p_qBj9M%}313mnn1hlnKO?&_e6VwqvS|8&~WE~xGIEm!e zPmI#pXgWk92TlfNsW4Ck)!^xUip_i?#K_naeC7b{gi+uP6H5%q)Dn2dnJcoTHH8Yb|(k3&To;oMhf z)!>N1V+)-SPaBda$_i2&dSSiuSx#(?O{qVG-vEe~LIz}$uuD#yy_Scwv$M-wR4Edc zr*f(4K3Y+|^-G+Nr~dc>!UTQH7{{(-;~DAf@ihDrS}UH7xKZRer}+=9M&0DptHZ;x z?TudQFh6BG7T{c^gS+6`mrp=sAZTH#!-%1Syxxy(w1=g`bC25FGMQ&Uj6~$K zuNdzuI-PhfWfnokYXQb}Z$v@0BF@a^g3|WGG67&PWstR4*-j+g5c0ZRVw8m!;)cVB zqI17K%?LV=UtoOrRfO!&i_j-x{F#OdF(=7Ond}v2GOjXh)Pp*c45$;h|4~u@3Ky3L zcl@EN48$kmTotM2oTKn7pMvG=`xeADtDomH>q%2n?Dq2fPuJ%Gt|-Nd^o^=N!GLBmBwgM&y%;V}28 znz5Vjrnw&Z6)!O&qRQg2Wpjwgk}b>EY?MkXb%8J&5X>OllS)#zts_KaY?+Q?Spa#qx_vj&*flcwN=guO>9fDb3+ZDk*45Hp~wVkrUzHDU+79gIY>q-MD+nmY@-u`bTkUj#Q@2bNtQ;I z@4mhZ!EZVYK>(hnwSf0#o|K*#|3Nj^lk7JnQL!cxFMDVvj?;91WSghVkLXH0n4V*M!V@_p|lQA9m4LEO$=EkNjZ*#h3B$=)exEJ`VEqX+huT2>D8#b%69JGB z*~o?_x&@I8DlVLyA3k(qArZt27DB`mh`oh6ep0#kH4}dRhruu~CLs=wHMV$E9QbE4 z5Lu;y^C1*2zy^Q5eN&{T3Y*O@rLTCS$cPpTKGY?3ND zd`xiJA<>~Z{iZ&=h(8IA2PlmmU*@6;i zZDPILh8z}7!M(5(BOu`xk2h@F-KMEESi2>kj=#)iUf=kODc9fs26m{YS)!y53eF;* zZwO{>USAuyS7y8M5c6*fU_zV0V`fd7;%7ViKDy3H;sC|7TRyQY=C=GrZP)^KpcNwp zGg>G##GKkhmFUh0BeXxe9oHtB6`f@N_$D_sEI)9CncFg!4>J?89Y&^sSq`6o;@{Ik@`Nj=fNVF#TX(5j1lx6 zHufx%D%70d1N0v8BJ?D@2Px8Pry7)q1uvTp*Qdf9P(q`Rkw?|dG7W--MB2NV_n)XU zDVL~4NbUDX$|0>F-68S)$-Rg)@S$<#S4s9j#wI@K0L9n|guwUdZvnDePa*Mz(7C~! z2Lkv4ed!6$K8Oyc2INqUpm!T@6G(Z3_EYD$-(=XH;$FCz1PqU;p%@l4wF69RW{+Mz zc^~P#T5DEu0i^EFAK>mg+m9nG8A=8{+1wB&1bop7f+J>-~6hrtNTsx_T%QZEb`9W=H=S=lx~ST2R!^N z=7N8LF1$KjG&I){w2PpyC_!k!IF)gI%h9xhW9OcoDOsIVES>L=Opx~F=4#>-IU=Wvs-j=i!GqZF{u* z6fYe!hPOR+XPnApKxB;~- zjd{~-{O%4P-N$q53!V#YLO8I|4~?Si;b4=%1{gU9O;3D3_a2@ocERy;mvvSKLZVcs zON{DR%lD1ttmT(~k^h>7bbtd>7dUb&5VD_{S>P6~9Kv9ae$bw@|ymDEuL*Xh~V{S;7OFNZM21 z-27`oGt&*4ugC#Yxq0&zv8hdlm@CDK@G^fOwrx=PhP3H7-wU;AhmTW>7`13$CK*zP4eX(7_?R$`wt4ieGf4E9=;ONP5=TBin-fC6-l&|2#Nh ze~lV75gd*uYhQmeK|q6egE)>y_fF8j3MUg(dzELLINL|Fv+)vaU!Mye<#8A6;4<5~ zgr|+#4}YjuUgkc<<c zp0(B%eT;%D&cjQ+AP%5jN28`FlHUZ>1P{k5yEqOxmOnFDsoyHO&l$H(IwT)Gapc|= zyO7*5z|*}afe$m$YzWW;LuttGGU+PnLVI)2r`e$i^7oYX*p^b#$GebOf?3$-J9ytq zYl&aE`1IZvMbF)3ogW==ITs}_MNFe{hNenXI1Ks5O04-t<+`DK&)+Hf2Hh^bDzV4n_C7r4vqf$YT z^PA!MX^`@g9a4I$VyAqXftGdEP?duyi9Z!b_x3n7X+?HfDD zXRP#Ag^kj;>z;xnl7gT5*$K@! zbd-ltqBV+wPrg0zj$Gm%!xUrRckNP+QR)dYvnSav<8zphgglJ=vJ9I1v)%#XRo71j zf7BSG09(GzvPdFNtBex~^VM zvck2#nqKE8XFdLicv!zMaqM~?xdP9(xJ-LnfaN&12(Pt^{}Wc1OpG*)PdBeY3gRq- z5FKR`q9C-c@9Pt-V(GWpNL^@eiO%k?&QTKAFzfRTz2Co=3oi6P;5zeAVt%Ai+;k=B z=8~U*j*&3oZ<4U!bba4dxO8%H(@cM9O$5aB&tgKHgc=L>yV))V-CScIgz{bw31r7(XqAQ~p>3PN> zAO6wwvbH&Y!<7yPBJFB>YVWn|%i(+;NdcCv0#;GOi!8L_#MRqxWu+ z-{tZh7llaud6koGv%uo}E~_p+UL5&0bQ!E!{$Fts*7^X?k2TCUS6g6%w_OoIdBsVW z)&eoEJ=Jg3|9zJ{h#0{ne-P#AJO5XTJ+23n8;+|l{XRf5%9vo^W;b0atXdl)wRK|O=j z0F&@^a}4(}O{*vYG=E3$NypVA7AWg(zC>`I3CtA2u3}Fx6R10T=P}wu1+%RnRWDv|12?6G37$N$Ef8v%uOeutlmHmNDx(pulagtm%!io9RX!p zYioQPNNFK|D#~r8uqah3J-erxm*@Ok0v@O>V;XDj5jl%le08OJbyv`LZ95hscLST^ z%OKc!GLaLQaS)IlJ{DZ@&`Yvz)wWxtIz%k zico9dP)6%vHv+@jM?9Zyjp6F?S}l&qeqTVk!g~-DnoS zG=C**5gGlN^*vkvqAo)aK2ImeKR_U~anVXTOkZxI&2o%$cPo;Lo_G{ls5Kp3rjGDuEG4Qv`(Ybz~>xX zLj#%Qsyv5>Vv)Zc5@*fGAzDU%MePM8^K1=TbdM<}I^Ro#4Fi+FyR5}qS$4kEv~^EA z+>yXK;U|5*$Tq8;nm++ny0@GTVHiJaZG>4o&yXz#mV(P>B8(a_9a0D8D!U|~);}o= z%J1aAo@Z$xmMLwH%C$wD$?vI%T3&TLewlXjU*wrOCL#488_8gprgXWw$vU!_E(>%; zL2vrjsP%Z^Tb?h-C|rzwip(7XEeNj#7GJDzD}SAo-5=NRE#qYo&h65jn!F`2OcRSD zvV6%eL({fd^UC+2R;TYl(D+x=^4Up_F87cqMw*25L2*n(ezqm@`Jo6_z>iFqFBnvN ztY5Xn+0#qWyGvCEqi<>DCtS@MN%M{L#I~$#wsgPO++HV--V;)|N6w%_2W5m|a1J zy_{#zn0^eS7b5j24laSkZvQuKv_Ai1pgN(S@;2DhpCu6? zbQbL!ss8Bl zA>a*Yb?}^X;4J%8aBF4Y)dH{^179WziS&s-GXuF0SKcvQh1<2X@j>TLDVNEO(8|ON zcjYD2oA%1xZcYiiYWT8Bv==O$B))9?w3L~R3^EMiz9)?R(v%v%v(e=)@SNKm5I^bE z_wi&`$YY5`;Bs`xY+8tb>QkpQ?P`3@Y|Xwbz-ctbMfJ$=x|BnY`y`YG=Jizt(FQhr z(!YD>0$+t!HY9s-`j8MCcJc!4YBY;3+dNw)F@a8(>W-c79M;xP^)@q((G>Cnd#rlC zQi_R}jcr_$g^Z`Kc86{*(spJKJO*8duXTDT==OMki3{g82Z=-)tckd3sw;Q*bLh&Z zL$L3q>jh+$ZZC8@yA~!4N2biBtZ#G?7Ul$7kUf)=>*wYE1qiZ+MaVS&owt;l_92_G z5R-u|czroB{aK>Un*EcH?xc!d789>hC(Gm`t_EBR?grLm*sYkE8uL8wS=O$WsNtzl)Oe+%iLORPz#+5Cn z%eEgQS}HQ82$HOG!fp#R=_Wu&G_vA0-p?m&Qw9uAl9yL#pclcR?!0&Db~jgTs)JTuwR10xON9RZ_->$ z=Ch2f2gg~M#7xXs&J$;AFrhI$_*BiEs$5BtGd!n+JvM|qCTAh&Lde7Pb?xPscNUGm zG)>h~ox`%M?!B#}nrNvvQG4cmh$An zGw3FfaWked7P|E}98@GUOx*nU-3X8d;$($SGRsqp;$XNq?!^b34~{^QAwChaJU4>A z1B^(*G`VI%yF_z_w05kWMToGFJ8 zo4D>ylFvVW@Q6J*=0+1Q>A;LUg@J|&pg-PD@bV})PJ6S4$J_7o`RLVv${A>-sSpk$ z*mGa%D<8w5=1$!~G)rmzI!Ke){vwYuc`?p{c4rhJmT%uPpfG0qK)7NQ9aSMCl3!x> z+v?a={o1H)&1bUGxgR(bvgbjgR9+n%rl9El&|n|`)3QS7>pk?$`!>AW1|@h$OdxJL z!PyTV+n%r-6i#txe3l)X#!AS~B;*iG<^0A^XtF48pwTUNS@f$P$nLozn=kY6rF$tn zcl_~++o3aOEw^@gFUk^_|JG{V>SB<9(^MHusy>joh&OZ{M_cWvn7D(=XG7D5&}h~f^>`vv+@ zbE3zQXN)?8eTqz6HDoG_0uOp$f4<7#g$eo#=e)Zg0*eHWL?X%Z*Yx}^8NX)e)y&Yh z?Hi*g&*pIywb}QQ9NlYpV#mb_KMQPY;|!w$d@12;zY+VVM0|~jMe4#aaG*3Mq=Q=4 zGyQ; zxV_{auGcb?m+ebI{WxTU@{{TrSfar0WG)>7=`sqd+8X$am)kA~wW4C*dH4pXIPwQR z3LeVLy&@$g)57gyhRLem6fpclH4>EKZU|W>LIVs#B@!hn^RYfk-{PpJlu}x$VW#6Q z{J|*hK;-wI_+Li1-n}DH78T%CkUBnGUt+W~+8Ny(9zM0hG2Hp_%+}60x^mmz{(MYI zEbQJeuy{4Z#NzTSV|zv{dSX%Cez6kg?g`{?l87jc^?iE9;wQFZ5W;ziPpYPuhyVB} z@s*AvX}HItc3Enm^3sr8^J0{e}9mMB7;zU>x3`h3U0$<9Fj3> zKYG-=%*Zpqt%~3dEhP;h>cxhFe0c_424Ny4Bh8cNZ*z$~kr@NwOIpd(wwKsUgw@Mf z6dtP#W**bue@gwjmWTY(Qu#m5?^Nusz2QFibbrV)o&L*%8p6CXV54I8?F(I2Cu}A20km!@<5&HW;h#N@JmQgw54+7q?s*?5-8Jnpb8T&k~DTi*PGt|)EG*PeN``N zJ6hek=Wxf>vabpq6Qo_uRx8}0(>lf+UH>LqRCwe%O5Eh!#BRpv^1wjt;h0BD*Cmtl z+f3^LoVDT#?mYjfPO|D=l(clgG*PJB;lguJ!O4+qwbGHEOfR$Ac6-nszt3Egy*JjB zWmFqEI+T|X!WT%3n{7vLk!J=A{V8`h{0g?IPWokXiG992&cHiJ0<7Ge6n&wC0W9qO zY3sl=v@nzUIAVhJh!M|dm~nLF=0|bZpWpJ1<7_6lIfLzg**ss*7VshKB4FPfO>rgN(f1qu@G89;{tA4{_L(rGSJD>T`(W-|nd!}2pUz8w&~PD|dXtP7RG>(1 zF-tMq5EoX>SIYz`Hv?O$T~7g(9;LonsLwwWi6B34?#;VVHiM~EG|4s!uLzMY>8z~C zkN~yST1b&cz3qZamn}=I_fB;mhIKlzS}ysOo14()>#8Lzl8IhG=zZZtY_Z`Ao6YGC z83Xut{%zX9`uj#iQ1$@}PcB4vGh;0->Cy~DqO-#AcDEVNse{IV1|MSueKis-M=+8D z!SO7uo<~E7-!aVdA34rC6e=QntCq0J2@-M%uIQ9H%^-8EMlqXXZO1TTg{e#;yO1>H z=Ew_v#JDm}&`x?)N|eSBOl#i{U@^1lzTGiU6?gs=J$+0tnzZ*zpgUI zDO$ml5J~V|pcrMBW{fz5WovT*K8Ct9DXybB0JQsYe@gUSJz;cM61tp3!aw?g2?C zoCAeeIz^U46%qDMOm~k(#uP0LQxxRV1fZ^*%ETWK9`Gr_!Rg_2aE`O zT1(1jKp&#Fk_`OtR2;3lJJL0{vlGc@BK|j3Zi zQ?jqsM#x$y2c4Oqaw`RuYidx#-5roDZ$^1}AysANzYZjTT(AD+!+C z--I?3pEysjlT(h@Jm6kzaIG*n2jHfHNp%wiuNnxzEuec9>!k)fr0sZ~HEAR}b5>5L zZEH~(b+g~*fcX!6vmYw(8d5!St44;Kpd?R?xE(==xG<=DWK?hDZs~KB2ANr3?nx4j z{SNjS|2>pQCN>yK0^1!nM$$%8I6AcNAj2nm_v{J*j-W`yqr8qFC*JQCVec7C0?nv8 zL&ZCTTEDnC8aOTQPZhu`^fR<-A|zn&Ag{%reqbDztQTlFusS?_ltxRry4|#j;96XT zRkObhI&cPRlP6@0`Wq_Y2xelyHp$I9A5A7Oj&a!Gi>j}nZF0k@7hZtB-_MPe!knwM z018zUvCxWd&2{Ad+ORZ&RQpCiOan1?<1Kjawv8>md?ux9G?#D{+6prG2w3CWci0Dg z?~)7bkf*jm8#J{U7kivDZH%EGH|Q9N`PvdX*T>4es77v;cS#kjzt%u9j}}U7IgJQ0 zJI?_r19sS>l0a9fYTs|(7t>DK7D)0YmQ_f6Dvuq;43k9*eGHAN)P0ywJXMiw%|jDTecKKa45*(v+rRCM*xGRf?&GO2h%Avl z^eR!$(3*;qj`eCBvCV@YCj1dU!+Za{LGynYu_TT4*o{XMmgo8~s z_e?JQ8zB-XNVa5eq~haHO+|$#P40SXc!^pY{BA?=n2Iftdg%5ouKn#1nBM*QJwNj< zRJ9O)S!!|H?FC*mw zFQ71E@b{bvDZG{agW75a5w2k!%PwYpuw;y7p=i`%p_)M9t|oQwc{RhRYH`>LhsSRq z|LHVv^I!e<{*5f7V#=v!W{l*0zS$4)QS!fBV~b!-uKqRMxAe~^|G675d>#-DVIG~L zL)>JzKQ{WmTDS2tD-kj@aw1407N0>aMK&yjKlY`j*BigrDd*kAdPp>VT{VsvRVzGn;N_HK-XCg44t03Tb zI_u`Npx4ZdS-*Rgh3M5gll1=7<2z3l-pv1pZdk9eDdP zBoL=1n?UT(GNvGJMflhpn~$o7#Fj@j&9IS&=Hit#lV7OE&2E)<9=)G|OxWm_joS$f zEY-UUXpj6}W($wTv!!Z@Y+k=wua}4pRn1I)RxklmE$c;#dVg~H{ApFMqN(-L+#eI9ds+e|f2&xW{4~)9ByNj4k9fU|I48sy}C-IM)e*}?4 zO*kl=&Hc9?j48YU6Y>M{00)A8WX&wvR#kBTaawjzZn)n`?dEzsiglG$mOh>_MCy|1n5)mDW zcPLF0*%MGA7MCJ&Yw&E+zZHs z2gE}hc%>CyD+|qE|NYNhe_s>;bjZZ{X_)%IH9owFZI#Q_O8vjd95(WM*W}qwEv7F1)$rTfFdvuT`wC0ZwnzIUdbju|8i- z6)Rni^TYcC`7+^hBkzBV=L^Odfqml1+cUbP#-pqd04WpeFyC-Yd*oeS+X0m z?t&uqQ!G}38{CBYFOg+|9}ouT54Y?De;D+SOa1=nni#l18cy%AdzMJ92V;PY32{+S z?70GjLBm<@*ZZ+R{*PZjKW(75V<`N7`~TUol^6RF@J;Kt?XS;<_fP)>o;@WHciQ+5ozb*ZPtTXj3_PxmlHIMOvW%Duk#iP*Qx{wL)02~bujYa|AF;}J8j-6h;MgW#-w37ssC7T6Y>JeSep6H6 zpV+cR>>&FVtR}C51f5Ba0lp*XeSah#-pa%8`T!vI_yM(Ts5rfPQI&H+rg*YxJTD`? zo`^WEi=>Y?*emwmEMQQ2?NIC0mGySXt{ZFhu5Xg*=4Wx4m>}^(9l$m9H<`+h&$mc3 z{RNca=>jTXE*2e>{xja;dt*X2pp=1&l!{$UM0zse&n-D`hL`@#Cf^2vGrjwDv6keJ zYcHiHqUL4FZ0_mnT|f87;h)TT*%AacE{ilk$CC@=M3FQBwFUyiT7E$SwOT^!^vvli zkdq`3sLS=`8j?u6@vliP9^}(aeLJ;cMv_)KPR4ujnSd%W7d@_oQ1p3#>o+wekq(pq z;xhTsW!@xqZz?JuSHGuTSHjSON3Scu`f|6~UEg+HI-S}GYf%MGk?@gOkrK z{41Z}y4`mBxnFF0DcWvZPyGQaJY5I+P?nUxNb{DKM`yXoVSQ4ZRxe~Fxv%4aTqs;;-tf5w)%zcinQDTA(NCgV%-uU%%eC6Mmf!h~(A@wfd261sdMZ?u zo3-|lll8OOy>6RV)2ejHJyF(E=5aCHkIl4Ts9wj^a%L~K?;4ZdoikBNA~Cy`Hjs=N zLY1gg5p94kesE>Vkyrw?A#uqQ69Z`R59z2X;+rt-2>qukwoNPOEpmd2e=M z0)v77yhX|_x(SV_7Sw4%2`(AFMAO`<11ScZARK}HY2_-PC2v;+DG+kfe9`Om86re~ z=jOWH%YzN0?d9PR^wmySFEi{{?TnIIue`=9rcv64ldrSx8T zAX&Yh54ENmMBI)lgckK(a28-Q!x*hGy zsFp9T)LIc4p-^Y9-R!IPqg|olnv_%}!qR=Kqpnt`p;}M64{7b25$xaw7FP?IJ{FjTK4hm2$* zVz%|f0kpbxn6T~G!m48*xh*4UcD8R2Y+c0vtqp`^<+un>Lq6_&%Q-V6BS2Ox@~ zZ1f;zAokLMksfKBx+)*c^gN=stbN$eh4P^Gz+{|(e3rzj_{;gJ!jqLH256uIkGPJ=uXx@=vLS$VG&7q9PrQOL&B%e<{+BwWXAh##w0_**c8LTz- zU(!?wm7~UQH;zEzM=^GaJWc-73W1#T)y}X?butq$6fA`(QgB4$E2kFKHs1KF z^I$nBaaf8Z4ZZx*m@kx)c^8+vpYT05u~vU&SPt%*YBJmKj0|HlM$h9+rO3I2`?~`H z1xF+}2|2v;i?8AamB+rg7W7-W+m7!n`2%eXL8XQqvn}+6hC=ZdpN#t%5K<79SxF?4 zqDCRB^|O}ykZQ&H)-rs#X*SGmJUGy`s%NW)utEkSeUE_Lq6BCBEZC=cMnHgHm-c#? z>X__v_vmg-ryUyu^y-BCR(zVRsKZLB6@-t=0}*PFx@wMeqc$WFK9vnWBn*eWj_kf- z6bG29fMz3o@Yo{46D8$BU_@};iw*V<-1aJ;&tqBv_26jFl2r+LW?d47u6$bKiY|Ai8EYf7OhKv^j|vr9XUm&v;*8L^kr-7Q=82Cz+zWG3k4Hw_?0asA8DCbtg~F3BoW155Hg0u+I)Bneh8edlyLpMh{0!K| zzg=WctjpmGEOz37nTzmuE|WkU$2kes^4itV_UIPF)!2++6c8r#Z%@zs7{Skk^bsK( z;vpBab|(YUo)?A#!O=Yz^evv*iVKa%mE2~sE$i5`XF??Tgjh~J%Sz{-55SBrjbUVG zoAcyWr7bdS<>x$x0u_wWH?@^)>z?~Pe}5MH8fO|$%dPm`emeq9`IkmC&O>v|ghhPt zPJ#?(Pa%%(ZgQ)0H*W|`z4shF2+s)bzWka0E(m8TbRpGC;tyn%d@~978F&Z_Rt;LA zdNG|6@Lrw?+;O<+xaD%xENWtpSm10ABQl$ew(KF^k68T>NSgxW zm2ybag93vXI+^oK!hi(h>PK){pr)!dh0;`t3-L7sV+KERrww34RztT&76+yqzYuxW z*6O)vGq+;CT+yFCBR>irgtU8IMVB(SXjS#uT7o_4MRGE)Mf)}Qf3IG&Y=%Q(uWDZf zwBXi*P9l8f+O!cuDMt{#ZT{4U9K~*B6TsB&0-D~8;i1cv(1iMp(Jv?4wtBb0DqWIj zlXxNUowCIM_qMQ3@MDh~tPQ+X39#sJQW;vUx?OksBM(d>88)4w%Ei4=bx3ho?N4-w zHOLeAeO6@MIKA@<3v!|Y!M zb_Fq!t$cX3Kebtu?zzqGjs9f=8FxHL^la+S#GMQRSZsadg6P3?#M6ODbQ~u8ZC8AR zkL*tj5?;k99bIYn{(T*N0$r42L*H8ToYGugqGrm;lU&j8QD#b>YbzmD8nA9Mfh`uh zJ$8AMQmfFrEC_gP5IQ0g+fSQyYv+RuR0+4JpRlmT@zT9s?oFCiOnYsa!whU| zRn?KCGSwX$`AR~D4$oT184?`V!$Xzn^uRZ`5y5^mNC!zh9@lHh)}^!eZ(->otwS;W z(Z%4~g4lO#Yb#dzMEiKAIHEc$mkq#TC`k;#<2&iacg|>Zi|3vgd_s~b)wP{)bp5=Jm)A(ySCnk}Ja~@k&Fa1R5sKL*~ca?P-+*wt`{+?d^4FD++0YGNBRE8$eZ_wse9bi^59fg7c z){nmcgHD6}nF`2;<>IYJruV34Xh+{eu|Zb(88-D_IQ8Jsv*NVjnCKhSz_Sa5kFZXl z7D~XSs7j+O{$iej5y$RkS$G>HT5GP!pzu}m5A4JPGD32eRPInMUe{7a9JSo$xig&1 zzXH)bj}U}Hl(~N}Qye%fABWEk8misP;OqO5te^%-CK}=i%tuQh&QV4?H64!(I;P zUt383yl(s(NT139m6us!M+H zC8q~iME`RY@Ei;@Zy=u%9A$g*ACRvQz^CLqS|t7}wDtMS3jhoi+n`6C6}>l!|AeDI z5fu=^L;Cm5=EoOG{CEFp9LEB~1*TnRzlb9JnopXFSu6|1PjrFhwu$&@<8J!dnlHSg zXt7&P@*9-Y0Px(NM(giw-Y^M-)Be?z8=u|gQ76rl(p^s=>t#WSS!q#G*JR2UGmzI< zYPGtO-w)zx5fRay!6e=c$ZM~SX63ESR9bBso6pf) zqu+IbuTF^5H0GjPm0#V~0L%2r`bu;x=NU!i4@w07SNYK5 zORE3?GRX)!I#aA%zaRZA91N4R^E zAU;F@HLL3E!uPsUSj^|>tY@q6cqS59qFa|1ecUpU8sRgI53w5S;hpp~%)$ZK_-0No zQuXjS#9lsMVn4)j;nxdE1ptWOvb;M57BI$G?;m^Xvg)$Ip;!kd@f&mufi#lSbmDYw zIbiy$WR2_Fv5#kWUsBXg!effLwOUI6WSCpe?qBZwub-a**y;)MUw`=5PbL9ifFUq9 zjQJl^d9(0;P&*lw3-*70{_C%AKgSUO7YL|G7X8~J{@;sq22TIyDgVBehyM*j>oO$y zkJbFITm6aKLH~>y|LP_16Ar+6O(9d0|8(u&5B>i3fgk_(^?#niB%rNd+Ne~bB5Ni$ z^;f9. All -complaints will be reviewed and investigated and will result in a response that -is deemed necessary and appropriate to the circumstances. The project team is -obligated to maintain confidentiality with regard to the reporter of an incident. -Further details of specific enforcement policies may be posted separately. - -Project maintainers who do not follow or enforce the Code of Conduct in good -faith may face temporary or permanent repercussions as determined by other -members of the project's leadership. - -## Attribution - -This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, -available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html - -[homepage]: https://www.contributor-covenant.org - -For answers to common questions about this code of conduct, see -https://www.contributor-covenant.org/faq diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index fdc1528..0000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,31 +0,0 @@ -# Contributing to csprng -We want to make contributing to this project as easy and transparent as -possible. - -## Pull Requests -We actively welcome your pull requests. - -1. Fork the repo and create your branch from `master`. -2. If you've added code that should be tested, add tests. -3. If you've changed APIs, update the documentation. -4. Ensure the test suite passes. -5. Make sure your code lints. -6. If you haven't already, complete the Contributor License Agreement ("CLA"). - -## Contributor License Agreement ("CLA") -In order to accept your pull request, we need you to submit a CLA. You only need -to do this once to work on any of Facebook's open source projects. - -Complete your CLA here: - -## Issues -We use GitHub issues to track public bugs. Please ensure your description is -clear and has sufficient instructions to be able to reproduce the issue. - -Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe -disclosure of security bugs. In those cases, please go through the process -outlined on that page and do not file a public issue. - -## License -By contributing to csprng, you agree that your contributions will be licensed -under the LICENSE file in the root directory of this source tree. \ No newline at end of file diff --git a/README.md b/README.md index 128e327..c71f0a8 100644 --- a/README.md +++ b/README.md @@ -4,14 +4,14 @@ torchcsprng is a [PyTorch C++/CUDA extension](https://pytorch.org/tutorials/advanced/cpp_extension.html) that provides: -- [AES](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) 128-bit encryption/decryption in two modes: [ECB](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Electronic_codebook_(ECB)) and [CTR](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Counter_(CTR)) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pytorch/csprng/blob/master/examples/encrypt_decrypt.ipynb) +- [AES](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) 128-bit encryption/decryption in two modes: [ECB]() and [CTR]() [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pytorch/csprng/blob/master/examples/encrypt_decrypt.ipynb) - [cryptographically secure pseudorandom number generators](https://en.wikipedia.org/wiki/Cryptographically_secure_pseudorandom_number_generator) for PyTorch. [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pytorch/csprng/blob/master/examples/csprng.ipynb) ## Design torchcsprng generates a random 128-bit key on CPU using one of its generators and runs -[AES128](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) in [CTR mode](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Counter_(CTR)) - either on CPU or on GPU using CUDA to generate a random 128 bit state and apply a transformation function to map it to target tensor values. +[AES128](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) in [CTR mode]() +either on CPU or on GPU using CUDA to generate a random 128 bit state and apply a transformation function to map it to target tensor values. This approach is based on [Parallel Random Numbers: As Easy as 1, 2, 3(John K. Salmon, Mark A. Moraes, Ron O. Dror, and David E. Shaw, D. E. Shaw Research)](http://www.thesalmons.org/john/random123/papers/random123sc11.pdf). It makes torchcsprng both crypto-secure and parallel on CUDA and CPU. @@ -25,96 +25,95 @@ Advantages: ## Features -torchcsprng 0.2.0 exposes new API for tensor encryption/decryption. Tensor encryption/decryption API is dtype agnostic, so a tensor of any dtype can be encrypted and the result can be stored to a tensor of any dtype. An encryption key also can be a tensor of any dtype. Currently torchcsprng supports [AES](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) cipher with 128-bit key in two modes: [ECB](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Electronic_codebook_(ECB)) and [CTR](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Counter_(CTR)). +torchcsprng 0.2.0 exposes new API for tensor encryption/decryption. Tensor encryption/decryption API is dtype agnostic, so a tensor of any dtype can be encrypted and the result can be stored to a tensor of any dtype. An encryption key also can be a tensor of any dtype. Currently torchcsprng supports [AES](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) cipher with 128-bit key in two modes: [ECB]() and [CTR](). -* `torchcsprng.encrypt(input: Tensor, output: Tensor, key: Tensor, cipher: string, mode: string)` +- `torchcsprng.encrypt(input: Tensor, output: Tensor, key: Tensor, cipher: string, mode: string)` > - `input` tensor can be any CPU or CUDA tensor of any dtype and size in bytes(zero-padding is used to make its size in bytes divisible by block size in bytes) > - `output` tensor can have any dtype and the same device as `input` tensor and the size in bytes rounded up to the block size in bytes(16 bytes for AES 128) > - `key` tensor can have any dtype and the same device as `input` tensor and size in bytes equal to 16 for AES 128 > - `cipher` currently can be only one supported value `"aes128"` -> - `mode` currently can be either [`"ecb"`](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Electronic_codebook_(ECB)) or [`"ctr"`](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Counter_(CTR)) +> - `mode` currently can be either [`"ecb"`]() or [`"ctr"`]() -* `torchcsprng.decrypt(input: Tensor, output: Tensor, key: Tensor, cipher: string, mode: string)` +- `torchcsprng.decrypt(input: Tensor, output: Tensor, key: Tensor, cipher: string, mode: string)` > - `input` tensor can be any CPU or CUDA tensor of any dtype with size in bytes divisible by the block size in bytes(16 bytes for AES 128) > - `output` tensor can have any dtype but the same device as `input` tensor and the same size in bytes as `input` tensor > - `key` tensor can have any dtype and the same device as `input` tensor and size in bytes equal to 16 for AES 128 > - `cipher` currently can be only one supported value `"aes128"` -> - `mode` currently can be either [`"ecb"`](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Electronic_codebook_(ECB)) or [`"ctr"`](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Counter_(CTR)) +> - `mode` currently can be either [`"ecb"`]() or [`"ctr"`]() torchcsprng exposes two methods to create crypto-secure and non-crypto-secure PRNGs: -| Method to create PRNG | Is crypto-secure? | Has seed? | Underlying implementation | -|----------------------------------------------------|-------------------|-----------|---------------------------| -| create_random_device_generator(token: string=None) | yes | no | See [std::random_device](https://en.cppreference.com/w/cpp/numeric/random/random_device) and [its constructor](https://en.cppreference.com/w/cpp/numeric/random/random_device/random_device). The implementation in libstdc++ expects token to name the source of random bytes. Possible token values include "default", "rand_s", "rdseed", "rdrand", "rdrnd", "/dev/urandom", "/dev/random", "mt19937", and integer string specifying the seed of the mt19937 engine. (Token values other than "default" are only valid for certain targets.) If token=None then constructs a new std::random_device object with an implementation-defined token. | -| create_mt19937_generator(seed: int=None) | no | yes | See [std::mt19937](https://en.cppreference.com/w/cpp/numeric/random/mersenne_twister_engine) and [its constructor](https://en.cppreference.com/w/cpp/numeric/random/mersenne_twister_engine/mersenne_twister_engine). Constructs a mersenne_twister_engine object, and initializes its internal state sequence to pseudo-random values. If seed=None then seeds the engine with default_seed.| +| Method to create PRNG | Is crypto-secure? | Has seed? | Underlying implementation | +| -------------------------------------------------- | ----------------- | --------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| create_random_device_generator(token: string=None) | yes | no | See [std::random_device](https://en.cppreference.com/w/cpp/numeric/random/random_device) and [its constructor](https://en.cppreference.com/w/cpp/numeric/random/random_device/random_device). The implementation in libstdc++ expects token to name the source of random bytes. Possible token values include "default", "rand_s", "rdseed", "rdrand", "rdrnd", "/dev/urandom", "/dev/random", "mt19937", and integer string specifying the seed of the mt19937 engine. (Token values other than "default" are only valid for certain targets.) If token=None then constructs a new std::random_device object with an implementation-defined token. | +| create_mt19937_generator(seed: int=None) | no | yes | See [std::mt19937](https://en.cppreference.com/w/cpp/numeric/random/mersenne_twister_engine) and [its constructor](https://en.cppreference.com/w/cpp/numeric/random/mersenne_twister_engine/mersenne_twister_engine). Constructs a mersenne_twister_engine object, and initializes its internal state sequence to pseudo-random values. If seed=None then seeds the engine with default_seed. | The following list of methods supports all forementioned PRNGs: -| Kernel | CUDA | CPU | -|------------------------|------|-----| -| random_() | yes | yes | -| random_(to) | yes | yes | -| random_(from, to) | yes | yes | -| uniform_(from, to) | yes | yes | -| normal_(mean, std) | yes | yes | -| cauchy_(median, sigma) | yes | yes | -| log_normal_(mean, std) | yes | yes | -| geometric_(p) | yes | yes | -| exponential_(lambda) | yes | yes | -| randperm(n) | yes* | yes | - -* the calculations are done on CPU and the result is copied to CUDA +| Kernel | CUDA | CPU | +| ----------------------- | ----- | --- | +| random\_() | yes | yes | +| random\_(to) | yes | yes | +| random\_(from, to) | yes | yes | +| uniform\_(from, to) | yes | yes | +| normal\_(mean, std) | yes | yes | +| cauchy\_(median, sigma) | yes | yes | +| log*normal*(mean, std) | yes | yes | +| geometric\_(p) | yes | yes | +| exponential\_(lambda) | yes | yes | +| randperm(n) | yes\* | yes | + +- the calculations are done on CPU and the result is copied to CUDA ## Installation CSPRNG works with Python 3.6-3.9 on the following operating systems and can be used with PyTorch tensors on the following devices: -| Tensor Device Type | Linux | macOS | MS Window | -|--------------------|-----------|---------------|----------------| -| CPU | Supported | Supported | Supported | +| Tensor Device Type | Linux | macOS | MS Window | +| ------------------ | --------- | ------------- | --------------------- | +| CPU | Supported | Supported | Supported | | CUDA | Supported | Not Supported | Supported since 0.2.0 | The following is the corresponding CSPRNG versions and supported Python versions. -| PyTorch | CSPRNG | Python | CUDA | -|---------|--------|----------|------------------| -| 1.8.0 | 0.2.0 | 3.7-3.9 | 10.1, 10.2, 11.1 | -| 1.7.1 | 0.1.4 | 3.6-3.8 | 9.2, 10.1, 10.2 | -| 1.7.0 | 0.1.3 | 3.6-3.8 | 9.2, 10.1, 10.2 | -| 1.6.0 | 0.1.2 | 3.6-3.8 | 9.2, 10.1, 10.2 | - +| PyTorch | CSPRNG | Python | CUDA | +| ------- | ------ | ------- | ---------------- | +| 1.8.0 | 0.2.0 | 3.7-3.9 | 10.1, 10.2, 11.1 | +| 1.7.1 | 0.1.4 | 3.6-3.8 | 9.2, 10.1, 10.2 | +| 1.7.0 | 0.1.3 | 3.6-3.8 | 9.2, 10.1, 10.2 | +| 1.6.0 | 0.1.2 | 3.6-3.8 | 9.2, 10.1, 10.2 | ### Binary Installation Anaconda: | OS | CUDA | | -|---------------|------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| ------------- | ---------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | Linux/Windows | 10.1

10.2

11.1

None | conda install torchcsprng cudatoolkit=10.1 -c pytorch -c conda-forge

conda install torchcsprng cudatoolkit=10.2 -c pytorch -c conda-forge

conda install torchcsprng cudatoolkit=11.1 -c pytorch -c conda-forge

conda install torchcsprng cpuonly -c pytorch -c conda-forge | -| macOS | None | conda install torchcsprng -c pytorch | +| macOS | None | conda install torchcsprng -c pytorch | pip: -| OS | CUDA | | -|---------------|------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| OS | CUDA | | +| ------------- | ---------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | Linux/Windows | 10.1

10.2

11.1

None | pip install torchcsprng==0.2.0+cu101 torch==1.8.0+cu101 -f https://download.pytorch.org/whl/cu101/torch_stable.html

pip install torchcsprng==0.2.0 torch==1.8.0 -f https://download.pytorch.org/whl/cu102/torch_stable.html

pip install torchcsprng==0.2.0+cu111 torch==1.8.0+cu111 -f https://download.pytorch.org/whl/cu111/torch_stable.html

pip install torchcsprng==0.2.0+cpu torch==1.8.0+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html | -| macOS | None | pip install torchcsprng torch | +| macOS | None | pip install torchcsprng torch | ### Nightly builds: Anaconda: | OS | CUDA | | -|---------------|------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| ------------- | ---------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | Linux/Windows | 10.1

10.2

11.1

None | conda install torchcsprng cudatoolkit=10.1 -c pytorch-nightly -c conda-forge

conda install torchcsprng cudatoolkit=10.2 -c pytorch-nightly -c conda-forge

conda install torchcsprng cudatoolkit=11.1 -c pytorch-nightly -c conda-forge

conda install torchcsprng cpuonly -c pytorch-nightly -c conda-forge | | macOS | None | conda install torchcsprng -c pytorch-nightly | pip: | OS | CUDA | | -|---------------|------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| ------------- | ---------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | Linux/Windows | 10.1

10.2

11.1

None | pip install --pre torchcsprng -f https://download.pytorch.org/whl/nightly/cu101/torch_nightly.html

pip install --pre torchcsprng -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html

pip install --pre torchcsprng -f https://download.pytorch.org/whl/nightly/cu111/torch_nightly.html

pip install --pre torchcsprng -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html | | macOS | None | pip install --pre torchcsprng -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html | @@ -122,46 +121,57 @@ pip: torchcsprng is a Python C++/CUDA extension that depends on PyTorch. In order to build CSPRNG from source it is required to have Python(>=3.7) with PyTorch(>=1.8.0) installed and C++ compiler(gcc/clang for Linux, XCode for macOS, Visual Studio for MS Windows). To build torchcsprng you can run the following: + ```console python setup.py install ``` + By default, GPU support is built if CUDA is found and torch.cuda.is_available() is True. Additionally, it is possible to force building GPU support by setting the FORCE_CUDA=1 environment variable, which is useful when building a docker image. ## Getting Started The torchcsprng API is available in `torchcsprng` module: + ```python import torch import torchcsprng as csprng ``` + Create crypto-secure PRNG from /dev/urandom: + ```python urandom_gen = csprng.create_random_device_generator('/dev/urandom') ``` Create empty boolean tensor on CUDA and initialize it with random values from urandom_gen: + ```python torch.empty(10, dtype=torch.bool, device='cuda').random_(generator=urandom_gen) ``` + ``` tensor([ True, False, False, True, False, False, False, True, False, False], device='cuda:0') ``` Create empty int16 tensor on CUDA and initialize it with random values in range [0, 100) from urandom_gen: + ```python torch.empty(10, dtype=torch.int16, device='cuda').random_(100, generator=urandom_gen) ``` + ``` tensor([59, 20, 68, 51, 18, 37, 7, 54, 74, 85], device='cuda:0', dtype=torch.int16) ``` Create non-crypto-secure MT19937 PRNG: + ```python mt19937_gen = csprng.create_mt19937_generator() torch.empty(10, dtype=torch.int64, device='cuda').random_(torch.iinfo(torch.int64).min, to=None, generator=mt19937_gen) ``` + ``` tensor([-7584783661268263470, 2477984957619728163, -3472586837228887516, -5174704429717287072, 4125764479102447192, -4763846282056057972, @@ -170,37 +180,42 @@ tensor([-7584783661268263470, 2477984957619728163, -3472586837228887516, ``` Create crypto-secure PRNG from default random device: + ```python default_device_gen = csprng.create_random_device_generator() torch.randn(10, device='cuda', generator=default_device_gen) ``` + ``` tensor([ 1.2885, 0.3240, -1.1813, 0.8629, 0.5714, 2.3720, -0.5627, -0.5551, -0.6304, 0.1090], device='cuda:0') ``` Create non-crypto-secure MT19937 PRNG with seed: + ```python mt19937_gen = csprng.create_mt19937_generator(42) torch.empty(10, device='cuda').geometric_(p=0.2, generator=mt19937_gen) ``` + ``` tensor([ 7., 1., 8., 1., 11., 3., 1., 1., 5., 10.], device='cuda:0') ``` Recreate MT19937 PRNG with the same seed: + ```python mt19937_gen = csprng.create_mt19937_generator(42) torch.empty(10, device='cuda').geometric_(p=0.2, generator=mt19937_gen) ``` + ``` tensor([ 7., 1., 8., 1., 11., 3., 1., 1., 5., 10.], device='cuda:0') ``` ## Contributing -We appreciate all contributions. If you are planning to contribute back bug-fixes, please do so without any further discussion. If you plan to contribute new features, utility functions or extensions, please first open an issue and discuss the feature with us. - +We appreciate all contributions. If you are planning to contribute back bug-fixes, please do so without any further discussion. If you plan to contribute new features, utility functions or extensions, please first open an issue and discuss the feature with us. ## License diff --git a/examples/csprng.ipynb b/examples/csprng.ipynb deleted file mode 100644 index 1f6b477..0000000 --- a/examples/csprng.ipynb +++ /dev/null @@ -1,226 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "csprng.ipynb", - "provenance": [], - "collapsed_sections": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "Lpno_zUJT8ms" - }, - "source": [ - "# Cryptographically secure pseudorandom number generators for PyTorch\n", - "\n", - "The torchcsprng API is available in `torchcsprng` module:\n" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "db4YYky-PDI_" - }, - "source": [ - "!pip install torchcsprng==0.2.0 torch==1.8.0 -f https://download.pytorch.org/whl/cu101/torch_stable.html" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "O1s_j8CPPHSn" - }, - "source": [ - "import torch\n", - "import torchcsprng as csprng" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "o1Kz25IoS9m-" - }, - "source": [ - "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HLlLxkDIUWCG" - }, - "source": [ - "Create crypto-secure PRNG from /dev/urandom:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "yyyYlq5kUQss" - }, - "source": [ - "urandom_gen = csprng.create_random_device_generator('/dev/urandom')" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xbUCnJfkUdUI" - }, - "source": [ - "Create empty boolean tensor on the `device` and initialize it with random values from `urandom_gen`:\n" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "zmj_VlIzUYIO" - }, - "source": [ - "torch.empty(10, dtype=torch.bool, device=device).random_(generator=urandom_gen)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ycODsYhtUud9" - }, - "source": [ - "Create empty int16 tensor on the `device` and initialize it with random values in range [0, 100) from `urandom_gen`:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "uel-jbW9UlZH" - }, - "source": [ - "torch.empty(10, dtype=torch.int16, device=device).random_(100, generator=urandom_gen)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1jXW1FEmVMW_" - }, - "source": [ - "Create non-crypto-secure MT19937 PRNG:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "sL-cwFGfVOrp" - }, - "source": [ - "mt19937_gen = csprng.create_mt19937_generator()\n", - "torch.empty(10, dtype=torch.int64, device=device).random_(torch.iinfo(torch.int64).min, to=None, generator=mt19937_gen)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "KW96wT4UVXBm" - }, - "source": [ - "Create crypto-secure PRNG from default random device:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "tjwbuE6FVRgm" - }, - "source": [ - "default_device_gen = csprng.create_random_device_generator()\n", - "torch.randn(10, device=device, generator=default_device_gen)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qYgdkZAYVfZT" - }, - "source": [ - "Create non-crypto-secure MT19937 PRNG with seed:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "xjOsYOxxVbzg" - }, - "source": [ - "mt19937_gen = csprng.create_mt19937_generator(42)\n", - "first = torch.empty(10, device=device).geometric_(p=0.2, generator=mt19937_gen)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "cV77v7tHVlRd" - }, - "source": [ - "Recreate MT19937 PRNG with the same seed:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "i0O2lC0hVjAg" - }, - "source": [ - "mt19937_gen = csprng.create_mt19937_generator(42)\n", - "second = torch.empty(10, device=device).geometric_(p=0.2, generator=mt19937_gen)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "OcgSK0mejcef" - }, - "source": [ - "Check that `first` equals to `second`:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "vMx1BRO3jh7L" - }, - "source": [ - "assert (first == second).all()" - ], - "execution_count": null, - "outputs": [] - } - ] -} diff --git a/examples/encrypt_decrypt.ipynb b/examples/encrypt_decrypt.ipynb deleted file mode 100644 index 3de8968..0000000 --- a/examples/encrypt_decrypt.ipynb +++ /dev/null @@ -1,307 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "encrypt_decrypt.ipynb", - "provenance": [], - "collapsed_sections": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "4JG-7IJgz_dK" - }, - "source": [ - "# PyTorch/CSPRNG encrypt/decrypt examples" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "H8TZemj30JvQ" - }, - "source": [ - "torchcsprng 0.2.0 exposes new API for tensor encryption/decryption. Tensor encryption/decryption API is dtype agnostic, so a tensor of any dtype can be encrypted and the result can be stored to a tensor of any dtype. An encryption key also can be a tensor of any dtype. Currently torchcsprng supports AES cipher with 128-bit key in two modes: ECB and CTR." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "jC1O-C25vI0W" - }, - "source": [ - "!pip install torchcsprng==0.2.0 torch==1.8.0 -f https://download.pytorch.org/whl/cu101/torch_stable.html" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "su2RWWdOrWFU" - }, - "source": [ - "import torch\n", - "import torchcsprng as csprng" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "NHTOLPZ_3254" - }, - "source": [ - "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "17L0sgmy0R6o" - }, - "source": [ - "torchcsprng implementation of AES with 128 bit key requires to have a key tensor of 16 bytes but of any dtype" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "rw7WYZ-50To9" - }, - "source": [ - "key = torch.empty(16, dtype=torch.uint8, device=device).random_(0, 256)\n", - "key" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RRfvyfHM4MY1" - }, - "source": [ - "Alternatively it can be a tensor of 8 elements of `torch.int16` or even 4 elements of `torch.float32`" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rCy01t1-0dtO" - }, - "source": [ - "The size of input tensor is 42 * (32/8) = 168 bytes. AES 128 operates with 16-bytes blocks, so zero-padding of 8 bytes will be used to form 176 bytes(eleven 16-bytes blocks)" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "LcuVmhyU0WTn" - }, - "source": [ - "initial = torch.empty(42, dtype=torch.float32, device=device).normal_(-24.0, 42.0)\n", - "initial" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rPNq2u4e3tlJ" - }, - "source": [ - "torchcsprng requires output tensor to be of the same size in bytes as input tensor rounded up to 16 bytes(AES 128 block size), so if `torch.int64` is dtype of the destination tensor size must be 176 / (64/8) = 22" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "RAJya9GT0gb4" - }, - "source": [ - "encrypted = torch.empty(22, dtype=torch.int64, device=device)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-DCI4QOh4oGX" - }, - "source": [ - "Call `torchcsprng.encrypt` to encrypt `initial` tensor in [ECB](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Electronic_codebook_(ECB)) mode with 128-bit `key` tensor and store the result to `encrypted` tensor." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "TK4OjPRq4lsJ" - }, - "source": [ - "csprng.encrypt(initial, encrypted, key, \"aes128\", \"ecb\")" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "yXUAwFHh5PSy" - }, - "source": [ - "Create an output tensor" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "4LtJ-kD446DJ" - }, - "source": [ - "decrypted = torch.empty_like(initial)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "8VcF04mf6Rn5" - }, - "source": [ - "Call `torchcsprng.decrypt` to decrypt `encrypted` tensor in ECB mode with 128-bit `key` tensor and store the result to `decrypted` tensor." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "kojXCFGK5v6l" - }, - "source": [ - "csprng.decrypt(encrypted, decrypted, key, \"aes128\", \"ecb\")" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "9dEBSPD6EFSu" - }, - "source": [ - "Let's check that `decrypted` equals to `initial`:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "yOc1ftnM5yyj" - }, - "source": [ - "assert (decrypted == initial).all()" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "cQWyteLlE4mQ" - }, - "source": [ - "Another example is to use [CTR](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Counter_(CTR)) mode with 128-bit `key` tensor of 4 elements of dtype `dtype=torch.float32`:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "ZFInqYawD7ks" - }, - "source": [ - "key = torch.empty(4, dtype=torch.float32, device=device).random_()\n", - "key" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "FRz94NaZGyRS" - }, - "source": [ - "Let's encrypt 100 elements `torch.bool` tensor and store the result in 56 elements `torch.int16` tensor:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "8uiqxiehF_is" - }, - "source": [ - "initial = torch.empty(100, dtype=torch.bool, device=device).random_()\n", - "initial" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "G0URlmQYGfcW" - }, - "source": [ - "encrypted = torch.empty(56, dtype=torch.int16, device=device)\n", - "csprng.encrypt(initial, encrypted, key, \"aes128\", \"ctr\")" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "U9Zz2oXoHw9Q" - }, - "source": [ - "Decrypt it back and check that `decrypted` equals to `initial`:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "YXNcdUbXHoPC" - }, - "source": [ - "decrypted = torch.empty_like(initial)\n", - "csprng.decrypt(encrypted, decrypted, key, \"aes128\", \"ctr\")\n", - "decrypted" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "ie7epw1SKrdQ" - }, - "source": [ - "assert (decrypted == initial).all()" - ], - "execution_count": null, - "outputs": [] - } - ] -} diff --git a/packaging/README.md b/packaging/README.md deleted file mode 100644 index 20ff064..0000000 --- a/packaging/README.md +++ /dev/null @@ -1,90 +0,0 @@ -# Building torchcsprng packages for release - -## Anaconda packages - -### Linux - -```bash -nvidia-docker run -it --ipc=host --rm -v $(pwd):/remote soumith/conda-cuda bash -pushd remote/conda - -./build_csprng.sh 9.0 -./build_csprng.sh 10.0 -./build_csprng.sh cpu - -# copy packages over to /remote -# exit docker -# anaconda upload -u pytorch torchcsprng*.bz2 -``` - -### OSX - -```bash -# create a fresh anaconda environment / install and activate it -conda install -y conda-build anaconda-client -./build_csprng.sh cpu - -# copy packages over to /remote -# exit docker -# anaconda upload -u pytorch torchcsprng*.bz2 -``` - -### Windows - -```bash -# Open `Git Bash` and change dir to `conda` -./build_csprng.sh 9.0 -./build_csprng.sh 10.0 -./build_csprng.sh cpu - -# copy packages to a output directory -# anaconda upload -u pytorch torchcsprng*.bz2 -``` - -## Wheels - -### Linux - -pushd wheel - -```bash -nvidia-docker run -it --ipc=host --rm -v $(pwd):/remote soumith/manylinux-cuda90:latest bash -cd remote -./linux_manywheel.sh cu90 - -rm -rf /usr/local/cuda* -./linux_manywheel.sh cpu -``` - -```bash -nvidia-docker run -it --ipc=host --rm -v $(pwd):/remote soumith/manylinux-cuda100:latest bash -cd remote -./linux_manywheel.sh cu100 -``` - -wheels are in the folders `cpu`, `cu90`, `cu100`. - -You can upload the `cu90` wheels to twine with `twine upload *.whl`. -Which wheels we upload depends on which wheels PyTorch uploads as default, and right now, it's `cu90`. - -### OSX - -```bash -pushd wheel -./osx_wheel.sh -``` - -### Windows - -```cmd -set PYTORCH_REPO=pytorch - -pushd windows -call build_csprng.bat 90 0.3.0 1 -call build_csprng.bat 100 0.3.0 1 -call build_csprng.bat cpu 0.3.0 1 -``` - -wheels are in the current folder. - -You can upload them to twine with `twine upload *.whl` diff --git a/packaging/build_conda.sh b/packaging/build_conda.sh deleted file mode 100755 index e0e096d..0000000 --- a/packaging/build_conda.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash -set -ex - -script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -. "$script_dir/pkg_helpers.bash" - -export BUILD_TYPE=conda -setup_env $(cat "version.txt" | sed "s/\([0-9]*\.[0-9]*\.[0-9]*\).*/\1/g") -export SOURCE_ROOT_DIR="$PWD" -setup_conda_pytorch_constraint -setup_conda_cudatoolkit_constraint -setup_visual_studio_constraint -setup_junit_results_folder -conda build $CONDA_CHANNEL_FLAGS -c defaults -c conda-forge --no-anaconda-upload --python "$PYTHON_VERSION" packaging/torchcsprng diff --git a/packaging/build_wheel.sh b/packaging/build_wheel.sh deleted file mode 100755 index 15b85a4..0000000 --- a/packaging/build_wheel.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/bin/bash -set -ex - -script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -. "$script_dir/pkg_helpers.bash" - -export BUILD_TYPE=wheel -setup_env $(cat "version.txt" | sed "s/\([0-9]*\.[0-9]*\.[0-9]*\).*/\1/g") -setup_wheel_python -pip_install numpy pyyaml future ninja -setup_pip_pytorch_version -python setup.py clean - -# Copy binaries to be included in the wheel distribution -if [[ "$(uname)" == Darwin || "$OSTYPE" == "msys" ]]; then - python_exec="$(which python)" - bin_path=$(dirname $python_exec) - env_path=$(dirname $bin_path) - if [[ "$(uname)" == Darwin ]]; then - # Install delocate to relocate the required binaries - pip_install delocate - fi -else - # Install auditwheel to get some inspection utilities - pip_install auditwheel - - # Point to custom libraries - export LD_LIBRARY_PATH=$(pwd)/ext_libraries/lib:$LD_LIBRARY_PATH - export TORCHCSPRNG_INCLUDE=$(pwd)/ext_libraries/include - export TORCHCSPRNG_LIBRARY=$(pwd)/ext_libraries/lib -fi - -if [[ "$OSTYPE" == "msys" ]]; then - IS_WHEEL=1 "$script_dir/windows/internal/vc_env_helper.bat" python setup.py bdist_wheel -else - IS_WHEEL=1 python setup.py bdist_wheel -fi - - -if [[ "$(uname)" == Darwin ]]; then - pushd dist/ - python_exec="$(which python)" - bin_path=$(dirname $python_exec) - env_path=$(dirname $bin_path) - for whl in *.whl; do - DYLD_LIBRARY_PATH="$env_path/lib/:$DYLD_LIBRARY_PATH" delocate-wheel -v $whl - done -else - if [[ "$OSTYPE" == "msys" ]]; then - "$script_dir/windows/internal/vc_env_helper.bat" python $script_dir/wheel/relocate.py - else - LD_LIBRARY_PATH="/usr/local/lib:$LD_LIBRARY_PATH" python $script_dir/wheel/relocate.py - fi -fi diff --git a/packaging/conda/build_csprng.sh b/packaging/conda/build_csprng.sh deleted file mode 100755 index 44fc0af..0000000 --- a/packaging/conda/build_csprng.sh +++ /dev/null @@ -1,229 +0,0 @@ -#!/usr/bin/env bash -if [[ -x "/remote/anaconda_token" ]]; then - . /remote/anaconda_token || true -fi - -set -ex - -if [[ "$CIRCLECI" == 'true' ]]; then - export PATH="/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/bin:/sbin:.:$PATH" -fi - -# Function to retry functions that sometimes timeout or have flaky failures -retry () { - $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) -} - -# Parse arguments and determmine version -########################################################### -if [[ -n "$DESIRED_CUDA" && -n "$TORCHCSPRNG_BUILD_VERSION" && -n "$TORCHCSPRNG_BUILD_NUMBER" ]]; then - desired_cuda="$DESIRED_CUDA" - build_version="$PYTORCH_BUILD_VERSION" - build_number="$PYTORCH_BUILD_NUMBER" -else - if [ "$#" -ne 3 ]; then - echo "Illegal number of parameters. Pass cuda version, pytorch version, build number" - echo "CUDA version should be Mm with no dot, e.g. '80'" - echo "DESIRED_PYTHON should be M.m, e.g. '2.7'" - exit 1 - fi - - desired_cuda="$1" - build_version="$2" - build_number="$3" -fi -if [[ "$desired_cuda" != cpu ]]; then - desired_cuda="$(echo $desired_cuda | tr -d cuda. )" -fi -echo "Building cuda version $desired_cuda and torchcsprng version: $build_version build_number: $build_number" - -if [[ "$desired_cuda" == 'cpu' ]]; then - cpu_only=1 - cuver="cpu" -else - # Switch desired_cuda to be M.m to be consistent with other scripts in - # pytorch/builder - export FORCE_CUDA=1 - cuda_nodot="$desired_cuda" - - if [[ ${#cuda_nodot} -eq 2 ]]; then - desired_cuda="${desired_cuda:0:1}.${desired_cuda:1:1}" - elif [[ ${#cuda_nodot} -eq 3 ]]; then - desired_cuda="${desired_cuda:0:2}.${desired_cuda:2:1}" - else - echo "unknown cuda version $cuda_nodot" - exit 1 - fi - - cuver="cu$cuda_nodot" -fi - -export TORCHCSPRNG_BUILD_VERSION=$build_version -export TORCHCSPRNG_BUILD_NUMBER=$build_number - -if [[ -z "$DESIRED_PYTHON" ]]; then - DESIRED_PYTHON=('3.5' '3.6' '3.7') -fi - -SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" - -if [[ -z "$WIN_PACKAGE_WORK_DIR" ]]; then - WIN_PACKAGE_WORK_DIR="$(echo $(pwd -W) | tr '/' '\\')\\tmp_conda_$(date +%H%M%S)" -fi - -mkdir -p "$WIN_PACKAGE_WORK_DIR" || true -csprng_rootdir="$(realpath ${WIN_PACKAGE_WORK_DIR})/torchcsprng-src" -git config --system core.longpaths true - -if [[ ! -d "$csprng_rootdir" ]]; then - rm -rf "$csprng_rootdir" - git clone "https://github.com/pytorch/csprng" "$csprng_rootdir" - pushd "$csprng_rootdir" - git checkout $PYTORCH_BRANCH - popd -fi - -cd "$SOURCE_DIR" - -export tmp_conda="${WIN_PACKAGE_WORK_DIR}\\conda" -export miniconda_exe="${WIN_PACKAGE_WORK_DIR}\\miniconda.exe" -rm -rf "$tmp_conda" -rm -f "$miniconda_exe" -curl -sSk https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe -o "$miniconda_exe" -"$SOURCE_DIR/install_conda.bat" && rm "$miniconda_exe" -pushd $tmp_conda -export PATH="$(pwd):$(pwd)/Library/usr/bin:$(pwd)/Library/bin:$(pwd)/Scripts:$(pwd)/bin:$PATH" -popd -retry conda install -yq conda-build - -ANACONDA_USER=pytorch-nightly -conda config --set anaconda_upload no - - -export TORCHCSPRNG_PACKAGE_SUFFIX="" -if [[ "$desired_cuda" == 'cpu' ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="" - export CONDA_CPUONLY_FEATURE="- cpuonly # [not osx]" - export CUDA_VERSION="None" -else - export CONDA_CPUONLY_FEATURE="" - . ./switch_cuda_version.sh $desired_cuda - if [[ "$desired_cuda" == "10.2" ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.2,<10.3 # [not osx]" - elif [[ "$desired_cuda" == "10.1" ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.1,<10.2 # [not osx]" - elif [[ "$desired_cuda" == "10.0" ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.0,<10.1 # [not osx]" - elif [[ "$desired_cuda" == "9.2" ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=9.2,<9.3 # [not osx]" - elif [[ "$desired_cuda" == "9.0" ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=9.0,<9.1 # [not osx]" - elif [[ "$desired_cuda" == "8.0" ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=8.0,<8.1 # [not osx]" - else - echo "unhandled desired_cuda: $desired_cuda" - exit 1 - fi -fi - -if [[ -z "$PYTORCH_VERSION" ]]; then - export CONDA_CHANNEL_FLAGS="-c pytorch-nightly -c pytorch" - export PYTORCH_VERSION="$(conda search --json 'pytorch[channel=pytorch-nightly]' | \ - python -c "import os, sys, json, re; cuver = '$cuver'; \ - cuver = cuver.replace('cu', 'cuda') if cuver != 'cpu' else cuver; \ - print(re.sub(r'\\+.*$', '', \ - [x['version'] for x in json.load(sys.stdin)['pytorch'] \ - if (x['platform'] == 'darwin' or cuver in x['fn']) \ - and 'py' + os.environ['DESIRED_PYTHON'] in x['fn']][-1]))")" - if [[ -z "$PYTORCH_VERSION" ]]; then - echo "PyTorch version auto detection failed" - echo "No package found for desired_cuda=$desired_cuda and DESIRED_PYTHON=$DESIRED_PYTHON" - exit 1 - fi -else - export CONDA_CHANNEL_FLAGS="-c pytorch -c pytorch-nightly" -fi -if [[ "$desired_cuda" == 'cpu' ]]; then - export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==$PYTORCH_VERSION" - export CONDA_PYTORCH_CONSTRAINT="- pytorch==$PYTORCH_VERSION" -else - export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==${PYTORCH_VERSION}" - export CONDA_PYTORCH_CONSTRAINT="- pytorch==${PYTORCH_VERSION}" -fi - -# Loop through all Python versions to build a package for each -for py_ver in "${DESIRED_PYTHON[@]}"; do - build_string="py${py_ver}_${build_string_suffix}" - folder_tag="${build_string}_$(date +'%Y%m%d')" - - # Create the conda package into this temporary folder. This is so we can find - # the package afterwards, as there's no easy way to extract the final filename - # from conda-build - output_folder="out_$folder_tag" - rm -rf "$output_folder" - mkdir "$output_folder" - - if [[ "$py_ver" == 3.5 ]]; then - export CONDA_TYPING_CONSTRAINT="- typing" - else - export CONDA_TYPING_CONSTRAINT="" - fi - - export VSTOOLCHAIN_PACKAGE=vs2017 - - # We need to build the compiler activation scripts first on Windows - time VSDEVCMD_ARGS=${VSDEVCMD_ARGS[@]} \ - conda build -c "$ANACONDA_USER" \ - --no-anaconda-upload \ - --output-folder "$output_folder" \ - ../$VSTOOLCHAIN_PACKAGE - - cp ../$VSTOOLCHAIN_PACKAGE/conda_build_config.yaml ../torchcsprng/conda_build_config.yaml - - conda config --set anaconda_upload no - echo "Calling conda-build at $(date)" - if [[ "$desired_cuda" == "9.2" ]]; then - time CMAKE_ARGS=${CMAKE_ARGS[@]} \ - BUILD_VERSION="$TORCHCSPRNG_BUILD_VERSION" \ - CU_VERSION="$cuver" \ - SOURCE_ROOT_DIR="$csprng_rootdir" \ - conda build -c "$ANACONDA_USER" \ - -c defaults \ - -c conda-forge \ - -c "numba/label/dev" \ - --no-anaconda-upload \ - --python "$py_ver" \ - --output-folder "$output_folder" \ - --no-verify \ - --no-test \ - ../torchcsprng - else - time CMAKE_ARGS=${CMAKE_ARGS[@]} \ - BUILD_VERSION="$TORCHCSPRNG_BUILD_VERSION" \ - CU_VERSION="$cuver" \ - SOURCE_ROOT_DIR="$csprng_rootdir" \ - conda build -c "$ANACONDA_USER" \ - -c defaults \ - -c conda-forge \ - --no-anaconda-upload \ - --python "$py_ver" \ - --output-folder "$output_folder" \ - --no-verify \ - --no-test \ - ../torchcsprng - fi - echo "Finished conda-build at $(date)" - - # Extract the package for testing - ls -lah "$output_folder" - built_package="$(find $output_folder/ -name '*torchcsprng*.tar.bz2')" - - # Copy the built package to the host machine for persistence before testing - if [[ -n "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then - mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true - cp "$built_package" "$PYTORCH_FINAL_PACKAGE_DIR/" - fi -done - - -set +e diff --git a/packaging/conda/install_conda.bat b/packaging/conda/install_conda.bat deleted file mode 100644 index 6052ad0..0000000 --- a/packaging/conda/install_conda.bat +++ /dev/null @@ -1 +0,0 @@ -start /wait "" "%miniconda_exe%" /S /InstallationType=JustMe /RegisterPython=0 /AddToPath=0 /D=%tmp_conda% diff --git a/packaging/conda/switch_cuda_version.sh b/packaging/conda/switch_cuda_version.sh deleted file mode 100755 index 342def9..0000000 --- a/packaging/conda/switch_cuda_version.sh +++ /dev/null @@ -1,28 +0,0 @@ -if [[ "$OSTYPE" == "msys" ]]; then - CUDA_DIR="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v$1" -else - CUDA_DIR="/usr/local/cuda-$1" -fi - -if ! ls "$CUDA_DIR" -then - echo "folder $CUDA_DIR not found to switch" -fi - -echo "Switching symlink to $CUDA_DIR" -mkdir -p /usr/local -rm -fr /usr/local/cuda -ln -s "$CUDA_DIR" /usr/local/cuda - -if [[ "$OSTYPE" == "msys" ]]; then - export CUDA_VERSION=`ls /usr/local/cuda/bin/cudart64*.dll | head -1 | tr '._' ' ' | cut -d ' ' -f2` - export CUDNN_VERSION=`ls /usr/local/cuda/bin/cudnn64*.dll | head -1 | tr '._' ' ' | cut -d ' ' -f2` -else - export CUDA_VERSION=$(ls /usr/local/cuda/lib64/libcudart.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev) - export CUDNN_VERSION=$(ls /usr/local/cuda/lib64/libcudnn.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev) -fi - -ls -alh /usr/local/cuda - -echo "CUDA_VERSION=$CUDA_VERSION" -echo "CUDNN_VERSION=$CUDNN_VERSION" diff --git a/packaging/pkg_helpers.bash b/packaging/pkg_helpers.bash deleted file mode 100644 index dad9622..0000000 --- a/packaging/pkg_helpers.bash +++ /dev/null @@ -1,382 +0,0 @@ -# A set of useful bash functions for common functionality we need to do in -# many build scripts - - -# Setup CUDA environment variables, based on CU_VERSION -# -# Inputs: -# CU_VERSION (cpu, cu92, cu100) -# NO_CUDA_PACKAGE (bool) -# BUILD_TYPE (conda, wheel) -# -# Outputs: -# VERSION_SUFFIX (e.g., "") -# PYTORCH_VERSION_SUFFIX (e.g., +cpu) -# WHEEL_DIR (e.g., cu100/) -# CUDA_HOME (e.g., /usr/local/cuda-9.2, respected by torch.utils.cpp_extension) -# FORCE_CUDA (respected by torchcsprng setup.py) -# NVCC_FLAGS (respected by torchcsprng setup.py) -# -# Precondition: CUDA versions are installed in their conventional locations in -# /usr/local/cuda-* -# -# NOTE: Why VERSION_SUFFIX versus PYTORCH_VERSION_SUFFIX? If you're building -# a package with CUDA on a platform we support CUDA on, VERSION_SUFFIX == -# PYTORCH_VERSION_SUFFIX and everyone is happy. However, if you are building a -# package with only CPU bits (e.g., torchaudio), then VERSION_SUFFIX is always -# empty, but PYTORCH_VERSION_SUFFIX is +cpu (because that's how you get a CPU -# version of a Python package. But that doesn't apply if you're on OS X, -# since the default CU_VERSION on OS X is cpu. -setup_cuda() { - - # First, compute version suffixes. By default, assume no version suffixes - export VERSION_SUFFIX="" - export PYTORCH_VERSION_SUFFIX="" - export WHEEL_DIR="" - # Wheel builds need suffixes (but not if they're on OS X, which never has suffix) - if [[ "$BUILD_TYPE" == "wheel" ]] && [[ "$(uname)" != Darwin ]]; then - # The default CUDA has no suffix - if [[ "$CU_VERSION" != "cu102" ]]; then - export PYTORCH_VERSION_SUFFIX="+$CU_VERSION" - fi - # Match the suffix scheme of pytorch, unless this package does not have - # CUDA builds (in which case, use default) - if [[ -z "$NO_CUDA_PACKAGE" ]]; then - export VERSION_SUFFIX="$PYTORCH_VERSION_SUFFIX" - export WHEEL_DIR="$CU_VERSION/" - fi - fi - - # Now work out the CUDA settings - case "$CU_VERSION" in - cu112) - if [[ "$OSTYPE" == "msys" ]]; then - export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.2" - else - export CUDA_HOME=/usr/local/cuda-11.2/ - fi - export FORCE_CUDA=1 - # Hard-coding gencode flags is temporary situation until - # https://github.com/pytorch/pytorch/pull/23408 lands - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50" - ;; - cu111) - if [[ "$OSTYPE" == "msys" ]]; then - export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.1" - else - export CUDA_HOME=/usr/local/cuda-11.1/ - fi - export FORCE_CUDA=1 - # Hard-coding gencode flags is temporary situation until - # https://github.com/pytorch/pytorch/pull/23408 lands - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50" - ;; - cu110) - if [[ "$OSTYPE" == "msys" ]]; then - export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.0" - else - export CUDA_HOME=/usr/local/cuda-11.0/ - fi - export FORCE_CUDA=1 - # Hard-coding gencode flags is temporary situation until - # https://github.com/pytorch/pytorch/pull/23408 lands - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_50,code=compute_50" - ;; - cu102) - if [[ "$OSTYPE" == "msys" ]]; then - export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2" - else - export CUDA_HOME=/usr/local/cuda-10.2/ - fi - export FORCE_CUDA=1 - # Hard-coding gencode flags is temporary situation until - # https://github.com/pytorch/pytorch/pull/23408 lands - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50" - ;; - cu101) - if [[ "$OSTYPE" == "msys" ]]; then - export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.1" - else - export CUDA_HOME=/usr/local/cuda-10.1/ - fi - export FORCE_CUDA=1 - # Hard-coding gencode flags is temporary situation until - # https://github.com/pytorch/pytorch/pull/23408 lands - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50" - ;; - cu100) - if [[ "$OSTYPE" == "msys" ]]; then - export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.0" - else - export CUDA_HOME=/usr/local/cuda-10.0/ - fi - export FORCE_CUDA=1 - # Hard-coding gencode flags is temporary situation until - # https://github.com/pytorch/pytorch/pull/23408 lands - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50" - ;; - cu92) - if [[ "$OSTYPE" == "msys" ]]; then - export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.2" - else - export CUDA_HOME=/usr/local/cuda-9.2/ - fi - export FORCE_CUDA=1 - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50" - ;; - cpu) - ;; - *) - echo "Unrecognized CU_VERSION=$CU_VERSION" - exit 1 - ;; - esac -} - -# Populate build version if necessary, and add version suffix -# -# Inputs: -# BUILD_VERSION (e.g., 0.2.0 or empty) -# VERSION_SUFFIX (e.g., +cpu) -# -# Outputs: -# BUILD_VERSION (e.g., 0.2.0.dev20190807+cpu) -# -# Fill BUILD_VERSION if it doesn't exist already with a nightly string -# Usage: setup_build_version 0.2.0 -setup_build_version() { - if [[ -z "$BUILD_VERSION" ]]; then - export BUILD_VERSION="$1.dev$(date "+%Y%m%d")$VERSION_SUFFIX" - else - export BUILD_VERSION="$BUILD_VERSION$VERSION_SUFFIX" - fi - - # Set build version based on tag if on tag - if [[ -n "${CIRCLE_TAG}" ]]; then - # Strip tag - export BUILD_VERSION="$(echo "${CIRCLE_TAG}" | sed -e 's/^v//' -e 's/-.*$//')${VERSION_SUFFIX}" - fi -} - -# Set some useful variables for OS X, if applicable -setup_macos() { - if [[ "$(uname)" == Darwin ]]; then - export MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ - fi -} - -# set variable to determine whether the typing library needs to be built in -setup_typing() { - if [[ "$PYTHON_VERSION" == 3.5 ]]; then - export CONDA_TYPING_CONSTRAINT="- typing" - else - export CONDA_TYPING_CONSTRAINT="" - fi -} - -# Top-level entry point for things every package will need to do -# -# Usage: setup_env 0.2.0 -setup_env() { - setup_cuda - setup_build_version "$1" - setup_macos - setup_typing -} - -# Function to retry functions that sometimes timeout or have flaky failures -retry () { - $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) -} - -# Inputs: -# PYTHON_VERSION (2.7, 3.5, 3.6, 3.7) -# UNICODE_ABI (bool) -# -# Outputs: -# PATH modified to put correct Python version in PATH -# -# Precondition: If Linux, you are in a soumith/manylinux-cuda* Docker image -setup_wheel_python() { - if [[ "$(uname)" == Darwin || "$OSTYPE" == "msys" ]]; then - eval "$(conda shell.bash hook)" - conda env remove -n "env$PYTHON_VERSION" || true - conda create -yn "env$PYTHON_VERSION" python="$PYTHON_VERSION" - conda activate "env$PYTHON_VERSION" - else - case "$PYTHON_VERSION" in - 2.7) - if [[ -n "$UNICODE_ABI" ]]; then - python_abi=cp27-cp27mu - else - python_abi=cp27-cp27m - fi - ;; - 3.5) python_abi=cp35-cp35m ;; - 3.6) python_abi=cp36-cp36m ;; - 3.7) python_abi=cp37-cp37m ;; - 3.8) python_abi=cp38-cp38 ;; - 3.9) python_abi=cp39-cp39 ;; - *) - echo "Unrecognized PYTHON_VERSION=$PYTHON_VERSION" - exit 1 - ;; - esac - # Download all the dependencies required to compile image and video_reader - # extensions - - mkdir -p ext_libraries - pushd ext_libraries - popd - export PATH="/opt/python/$python_abi/bin:$(pwd)/ext_libraries/bin:$PATH" - fi -} - -# Install with pip a bit more robustly than the default -pip_install() { - retry pip install --progress-bar off "$@" -} - -# Install torch with pip, respecting PYTORCH_VERSION, and record the installed -# version into PYTORCH_VERSION, if applicable -setup_pip_pytorch_version() { - if [[ -z "$PYTORCH_VERSION" ]]; then - # Install latest prerelease version of torch, per our nightlies, consistent - # with the requested cuda version - pip_install --pre torch -f "https://download.pytorch.org/whl/nightly/${WHEEL_DIR}torch_nightly.html" - if [[ "$CUDA_VERSION" == "cpu" ]]; then - # CUDA and CPU are ABI compatible on the CPU-only parts, so strip - # in this case - export PYTORCH_VERSION="$(pip show torch | grep ^Version: | sed 's/Version: *//' | sed 's/+.\+//')" - else - export PYTORCH_VERSION="$(pip show torch | grep ^Version: | sed 's/Version: *//')" - fi - else - pip_install "torch==$PYTORCH_VERSION$PYTORCH_VERSION_SUFFIX" \ - -f "https://download.pytorch.org/whl/${CU_VERSION}/torch_stable.html" \ - -f "https://download.pytorch.org/whl/${UPLOAD_CHANNEL}/${CU_VERSION}/torch_${UPLOAD_CHANNEL}.html" - fi -} - -# Fill PYTORCH_VERSION with the latest conda nightly version, and -# CONDA_CHANNEL_FLAGS with appropriate flags to retrieve these versions -# -# You MUST have populated PYTORCH_VERSION_SUFFIX before hand. -setup_conda_pytorch_constraint() { - if [[ -z "$PYTORCH_VERSION" ]]; then - export CONDA_CHANNEL_FLAGS="-c pytorch-nightly -c pytorch" - export PYTORCH_VERSION="$(conda search --json 'pytorch[channel=pytorch-nightly]' | \ - python -c "import os, sys, json, re; cuver = os.environ.get('CU_VERSION'); \ - cuver_1 = cuver.replace('cu', 'cuda') if cuver != 'cpu' else cuver; \ - cuver_2 = (cuver[:-1] + '.' + cuver[-1]).replace('cu', 'cuda') if cuver != 'cpu' else cuver; \ - print(re.sub(r'\\+.*$', '', \ - [x['version'] for x in json.load(sys.stdin)['pytorch'] \ - if (x['platform'] == 'darwin' or cuver_1 in x['fn'] or cuver_2 in x['fn']) \ - and 'py' + os.environ['PYTHON_VERSION'] in x['fn']][-1]))")" - if [[ -z "$PYTORCH_VERSION" ]]; then - echo "PyTorch version auto detection failed" - echo "No package found for CU_VERSION=$CU_VERSION and PYTHON_VERSION=$PYTHON_VERSION" - exit 1 - fi - else - export CONDA_CHANNEL_FLAGS="-c pytorch -c pytorch-${UPLOAD_CHANNEL}" - fi - if [[ "$CU_VERSION" == cpu ]]; then - export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==$PYTORCH_VERSION${PYTORCH_VERSION_SUFFIX}" - export CONDA_PYTORCH_CONSTRAINT="- pytorch==$PYTORCH_VERSION" - else - export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==${PYTORCH_VERSION}${PYTORCH_VERSION_SUFFIX}" - export CONDA_PYTORCH_CONSTRAINT="- pytorch==${PYTORCH_VERSION}${PYTORCH_VERSION_SUFFIX}" - fi - if [[ "$OSTYPE" == msys && "$CU_VERSION" == cu92 ]]; then - export CONDA_CHANNEL_FLAGS="${CONDA_CHANNEL_FLAGS} -c defaults -c numba/label/dev" - fi -} - -# Translate CUDA_VERSION into CUDA_CUDATOOLKIT_CONSTRAINT -setup_conda_cudatoolkit_constraint() { - export CONDA_CPUONLY_FEATURE="" - if [[ "$(uname)" == Darwin ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="" - else - case "$CU_VERSION" in - cu112) - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.2,<11.3 # [not osx]" - ;; - cu111) - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.1,<11.2 # [not osx]" - ;; - cu110) - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.0,<11.1 # [not osx]" - ;; - cu102) - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.2,<10.3 # [not osx]" - ;; - cu101) - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.1,<10.2 # [not osx]" - ;; - cu100) - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.0,<10.1 # [not osx]" - ;; - cu92) - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=9.2,<9.3 # [not osx]" - ;; - cpu) - export CONDA_CUDATOOLKIT_CONSTRAINT="" - export CONDA_CPUONLY_FEATURE="- cpuonly" - ;; - *) - echo "Unrecognized CU_VERSION=$CU_VERSION" - exit 1 - ;; - esac - fi -} - -setup_conda_cudatoolkit_plain_constraint() { - export CONDA_CPUONLY_FEATURE="" - export CMAKE_USE_CUDA=1 - if [[ "$(uname)" == Darwin ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="" - export CMAKE_USE_CUDA=0 - else - case "$CU_VERSION" in - cu102) - export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit=10.2" - ;; - cu101) - export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit=10.1" - ;; - cu100) - export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit=10.0" - ;; - cu92) - export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit=9.2" - ;; - cpu) - export CONDA_CUDATOOLKIT_CONSTRAINT="" - export CONDA_CPUONLY_FEATURE="cpuonly" - export CMAKE_USE_CUDA=0 - ;; - *) - echo "Unrecognized CU_VERSION=$CU_VERSION" - exit 1 - ;; - esac - fi -} - -# Build the proper compiler package before building the final package -setup_visual_studio_constraint() { - if [[ "$OSTYPE" == "msys" ]]; then - export VSTOOLCHAIN_PACKAGE=vs$VC_YEAR - conda build $CONDA_CHANNEL_FLAGS --no-anaconda-upload packaging/$VSTOOLCHAIN_PACKAGE - cp packaging/$VSTOOLCHAIN_PACKAGE/conda_build_config.yaml packaging/torchcsprng/conda_build_config.yaml - fi -} - -setup_junit_results_folder() { - if [[ "$CI" == "true" ]]; then - export CONDA_PYTORCH_BUILD_RESULTS_DIRECTORY="${SOURCE_ROOT_DIR}/build_results/results.xml" - fi -} diff --git a/packaging/torchcsprng/bld.bat b/packaging/torchcsprng/bld.bat deleted file mode 100644 index 8c7c833..0000000 --- a/packaging/torchcsprng/bld.bat +++ /dev/null @@ -1,27 +0,0 @@ -@echo on - -set TORCHCSPRNG_BUILD_VERSION=%PKG_VERSION% -set TORCHCSPRNG_BUILD_NUMBER=%PKG_BUILDNUM% - -set build_with_cuda= - -if "%CUDA_VERSION%" == "None" goto cuda_flags_end -if "%CUDA_VERSION%" == "cpu" goto cuda_flags_end -if "%CUDA_VERSION%" == "" goto cuda_flags_end - -set build_with_cuda=1 -set desired_cuda=%CUDA_VERSION:~0,-1%.%CUDA_VERSION:~-1,1% - -set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%desired_cuda% -set CUDA_BIN_PATH=%CUDA_PATH%\bin -set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -if "%desired_cuda%" == "9.0" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50 -if "%desired_cuda%" == "9.2" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_61,code=sm_61 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50 -if "%desired_cuda%" == "10.0" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50 -if "%desired_cuda%" == "10.1" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50 -if "%desired_cuda%" == "10.2" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50 - -:cuda_flags_end - -python setup.py install --single-version-externally-managed --record=record.txt -if errorlevel 1 exit /b 1 diff --git a/packaging/torchcsprng/conda_build_config.yaml b/packaging/torchcsprng/conda_build_config.yaml deleted file mode 100644 index 257515c..0000000 --- a/packaging/torchcsprng/conda_build_config.yaml +++ /dev/null @@ -1,26 +0,0 @@ -channel_sources: - - pytorch-nightly,pytorch,defaults -blas_impl: - - mkl # [x86_64] -c_compiler: - - vs2017 # [win] -cxx_compiler: - - vs2017 # [win] -python: - - 3.5 - - 3.6 -# This differs from target_platform in that it determines what subdir the compiler -# will target, not what subdir the compiler package will be itself. -# For example, we need a win-64 vs2008_win-32 package, so that we compile win-32 -# code on win-64 miniconda. -cross_compiler_target_platform: - - win-64 # [win] -target_platform: - - win-64 # [win] -vc: - - 14 -zip_keys: - - # [win] - - vc # [win] - - c_compiler # [win] - - cxx_compiler # [win] diff --git a/packaging/torchcsprng/meta.yaml b/packaging/torchcsprng/meta.yaml deleted file mode 100644 index 1b4570d..0000000 --- a/packaging/torchcsprng/meta.yaml +++ /dev/null @@ -1,56 +0,0 @@ -package: - name: torchcsprng - version: "{{ environ.get('BUILD_VERSION') }}" - -source: - path: "{{ environ.get('SOURCE_ROOT_DIR') }}" - -requirements: - build: - - {{ compiler('c') }} # [win] - - {{ compiler('cxx') }} - - host: - - python - - setuptools - {{ environ.get('CONDA_PYTORCH_BUILD_CONSTRAINT') }} - {{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT') }} - {{ environ.get('CONDA_CPUONLY_FEATURE') }} - - run: - - python - - pillow >=4.1.1 - - numpy >=1.11 - {{ environ.get('CONDA_PYTORCH_CONSTRAINT') }} - {{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT') }} - -build: - string: py{{py}}_{{ environ['CU_VERSION'] }} - script: python setup.py install --single-version-externally-managed --record=record.txt # [not win] - script_env: - - CU_VERSION - - CUDA_HOME - - FORCE_CUDA - - NVCC_FLAGS - - BUILD_VERSION - features: - {{ environ.get('CONDA_CPUONLY_FEATURE') }} - -#test: -# imports: -# - torch -# - torchcsprng -# source_files: -# - test -# requires: -# - pytest -# - scipy -# - pycrypto -# commands: -# pytest . --verbose - -about: - home: https://github.com/pytorch/csprng - license: BSD - license_file: LICENSE - summary: 'Cryptographically secure pseudorandom number generators for PyTorch' diff --git a/packaging/vs2017/activate.bat b/packaging/vs2017/activate.bat deleted file mode 100644 index ccecfc2..0000000 --- a/packaging/vs2017/activate.bat +++ /dev/null @@ -1,44 +0,0 @@ -:: Set env vars that tell distutils to use the compiler that we put on path -SET DISTUTILS_USE_SDK=1 -SET MSSdk=1 - -SET "VS_VERSION=15.0" -SET "VS_MAJOR=15" -SET "VS_YEAR=2017" - -set "MSYS2_ARG_CONV_EXCL=/AI;/AL;/OUT;/out" -set "MSYS2_ENV_CONV_EXCL=CL" - -:: For Python 3.5+, ensure that we link with the dynamic runtime. See -:: http://stevedower.id.au/blog/building-for-python-3-5-part-two/ for more info -set "PY_VCRUNTIME_REDIST=%PREFIX%\\bin\\vcruntime140.dll" - -for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do ( - if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( - set "VSINSTALLDIR=%%i\" - goto :vswhere - ) -) - -:vswhere - -:: Shorten PATH to avoid the `input line too long` error. -SET MyPath=%PATH% - -setlocal EnableDelayedExpansion - -SET TempPath="%MyPath:;=";"%" -SET var= -FOR %%a IN (%TempPath%) DO ( - IF EXIST %%~sa ( - SET "var=!var!;%%~sa" - ) -) - -set "TempPath=!var:~1!" -endlocal & set "PATH=%TempPath%" - -:: Shorten current directory too -FOR %%A IN (.) DO CD "%%~sA" - -:: other things added by install_activate.bat at package build time diff --git a/packaging/vs2017/conda_build_config.yaml b/packaging/vs2017/conda_build_config.yaml deleted file mode 100644 index 5188bb0..0000000 --- a/packaging/vs2017/conda_build_config.yaml +++ /dev/null @@ -1,24 +0,0 @@ -blas_impl: - - mkl # [x86_64] -c_compiler: - - vs2017 # [win] -cxx_compiler: - - vs2017 # [win] -python: - - 3.5 - - 3.6 -# This differs from target_platform in that it determines what subdir the compiler -# will target, not what subdir the compiler package will be itself. -# For example, we need a win-64 vs2008_win-32 package, so that we compile win-32 -# code on win-64 miniconda. -cross_compiler_target_platform: - - win-64 # [win] -target_platform: - - win-64 # [win] -vc: - - 14 -zip_keys: - - # [win] - - vc # [win] - - c_compiler # [win] - - cxx_compiler # [win] diff --git a/packaging/vs2017/install_activate.bat b/packaging/vs2017/install_activate.bat deleted file mode 100644 index de0e6ff..0000000 --- a/packaging/vs2017/install_activate.bat +++ /dev/null @@ -1,30 +0,0 @@ -set YEAR=2017 -set VER=15 - -mkdir "%PREFIX%\etc\conda\activate.d" -COPY "%RECIPE_DIR%\activate.bat" "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - -IF "%cross_compiler_target_platform%" == "win-64" ( - set "target_platform=amd64" - echo SET "CMAKE_GENERATOR=Visual Studio %VER% %YEAR% Win64" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - IF "%VSDEVCMD_ARGS%" == "" ( - echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x64 >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - ) ELSE ( - echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x64 %VSDEVCMD_ARGS% >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 %VSDEVCMD_ARGS% >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - ) - echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - ) else ( - set "target_platform=x86" - echo SET "CMAKE_GENERATOR=Visual Studio %VER% %YEAR%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo CALL "VC\Auxiliary\Build\vcvars32.bat" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo popd - ) - diff --git a/packaging/vs2017/install_runtime.bat b/packaging/vs2017/install_runtime.bat deleted file mode 100644 index 5163c16..0000000 --- a/packaging/vs2017/install_runtime.bat +++ /dev/null @@ -1,49 +0,0 @@ -set VC_PATH=x86 -if "%ARCH%"=="64" ( - set VC_PATH=x64 -) - -set MSC_VER=2017 - -rem :: This should always be present for VC installed with VS. Not sure about VC installed with Visual C++ Build Tools 2015 -rem FOR /F "usebackq tokens=3*" %%A IN (`REG QUERY "HKEY_LOCAL_MACHINE\Software\Microsoft\DevDiv\VC\Servicing\14.0\IDE.x64" /v UpdateVersion`) DO ( -rem set SP=%%A -rem ) - -rem if not "%SP%" == "%PKG_VERSION%" ( -rem echo "Version detected from registry: %SP%" -rem echo "does not match version of package being built (%PKG_VERSION%)" -rem echo "Do you have current updates for VS 2015 installed?" -rem exit 1 -rem ) - - -REM ========== REQUIRES Win 10 SDK be installed, or files otherwise copied to location below! -robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%" "%LIBRARY_BIN%" *.dll /E -robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%" "%PREFIX%" *.dll /E -if %ERRORLEVEL% GEQ 8 exit 1 - -REM ========== This one comes from visual studio 2017 -set "VC_VER=141" - -for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do ( - if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( - set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat" - goto :eof - ) -) - -@setlocal -call "%VS15VARSALL%" x64 - -set "REDIST_ROOT=%VCToolsRedistDir%%VC_PATH%" - -robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%LIBRARY_BIN%" *.dll /E -if %ERRORLEVEL% LSS 8 exit 0 -robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%PREFIX%" *.dll /E -if %ERRORLEVEL% LSS 8 exit 0 -robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%LIBRARY_BIN%" *.dll /E -if %ERRORLEVEL% LSS 8 exit 0 -robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%PREFIX%" *.dll /E -if %ERRORLEVEL% LSS 8 exit 0 -@endlocal diff --git a/packaging/vs2017/meta.yaml b/packaging/vs2017/meta.yaml deleted file mode 100644 index 1f56952..0000000 --- a/packaging/vs2017/meta.yaml +++ /dev/null @@ -1,24 +0,0 @@ -{% set vcver="14.1" %} -{% set vcfeature="14" %} -{% set vsyear="2017" %} -{% set fullver="15.4.27004.2010" %} - -package: - name: vs{{ vsyear }} - version: {{ fullver }} - -build: - skip: True [not win] - script_env: - - VSDEVCMD_ARGS # [win] - -outputs: - - name: vs{{ vsyear }}_{{ cross_compiler_target_platform }} - script: install_activate.bat - track_features: - # VS 2017 is binary-compatible with VS 2015/vc14. Tools are "v141". - strong: - - vc{{ vcfeature }} - about: - summary: Activation and version verification of MSVC {{ vcver }} (VS {{ vsyear }}) compiler - license: BSD 3-clause diff --git a/packaging/vs2019/activate.bat b/packaging/vs2019/activate.bat deleted file mode 100644 index 6f607ba..0000000 --- a/packaging/vs2019/activate.bat +++ /dev/null @@ -1,44 +0,0 @@ -:: Set env vars that tell distutils to use the compiler that we put on path -SET DISTUTILS_USE_SDK=1 -SET MSSdk=1 - -SET "VS_VERSION=16.0" -SET "VS_MAJOR=16" -SET "VS_YEAR=2019" - -set "MSYS2_ARG_CONV_EXCL=/AI;/AL;/OUT;/out" -set "MSYS2_ENV_CONV_EXCL=CL" - -:: For Python 3.5+, ensure that we link with the dynamic runtime. See -:: http://stevedower.id.au/blog/building-for-python-3-5-part-two/ for more info -set "PY_VCRUNTIME_REDIST=%PREFIX%\\bin\\vcruntime140.dll" - -for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [16^,17^) -property installationPath`) do ( - if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( - set "VSINSTALLDIR=%%i\" - goto :vswhere - ) -) - -:vswhere - -:: Shorten PATH to avoid the `input line too long` error. -SET MyPath=%PATH% - -setlocal EnableDelayedExpansion - -SET TempPath="%MyPath:;=";"%" -SET var= -FOR %%a IN (%TempPath%) DO ( - IF EXIST %%~sa ( - SET "var=!var!;%%~sa" - ) -) - -set "TempPath=!var:~1!" -endlocal & set "PATH=%TempPath%" - -:: Shorten current directory too -FOR %%A IN (.) DO CD "%%~sA" - -:: other things added by install_activate.bat at package build time diff --git a/packaging/vs2019/conda_build_config.yaml b/packaging/vs2019/conda_build_config.yaml deleted file mode 100644 index 358052e..0000000 --- a/packaging/vs2019/conda_build_config.yaml +++ /dev/null @@ -1,24 +0,0 @@ -blas_impl: - - mkl # [x86_64] -c_compiler: - - vs2019 # [win] -cxx_compiler: - - vs2019 # [win] -python: - - 3.5 - - 3.6 -# This differs from target_platform in that it determines what subdir the compiler -# will target, not what subdir the compiler package will be itself. -# For example, we need a win-64 vs2008_win-32 package, so that we compile win-32 -# code on win-64 miniconda. -cross_compiler_target_platform: - - win-64 # [win] -target_platform: - - win-64 # [win] -vc: - - 14 -zip_keys: - - # [win] - - vc # [win] - - c_compiler # [win] - - cxx_compiler # [win] diff --git a/packaging/vs2019/install_activate.bat b/packaging/vs2019/install_activate.bat deleted file mode 100644 index 3c38253..0000000 --- a/packaging/vs2019/install_activate.bat +++ /dev/null @@ -1,30 +0,0 @@ -set YEAR=2019 -set VER=16 - -mkdir "%PREFIX%\etc\conda\activate.d" -COPY "%RECIPE_DIR%\activate.bat" "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - -IF "%cross_compiler_target_platform%" == "win-64" ( - set "target_platform=amd64" - echo SET "CMAKE_GENERATOR=Visual Studio %VER% %YEAR% Win64" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - IF "%VSDEVCMD_ARGS%" == "" ( - echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x64 >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - ) ELSE ( - echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x64 %VSDEVCMD_ARGS% >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 %VSDEVCMD_ARGS% >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - ) - echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - ) else ( - set "target_platform=x86" - echo SET "CMAKE_GENERATOR=Visual Studio %VER% %YEAR%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo CALL "VC\Auxiliary\Build\vcvars32.bat" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo popd - ) - diff --git a/packaging/vs2019/install_runtime.bat b/packaging/vs2019/install_runtime.bat deleted file mode 100644 index e09a5cc..0000000 --- a/packaging/vs2019/install_runtime.bat +++ /dev/null @@ -1,49 +0,0 @@ -set VC_PATH=x86 -if "%ARCH%"=="64" ( - set VC_PATH=x64 -) - -set MSC_VER=2019 - -rem :: This should always be present for VC installed with VS. Not sure about VC installed with Visual C++ Build Tools 2015 -rem FOR /F "usebackq tokens=3*" %%A IN (`REG QUERY "HKEY_LOCAL_MACHINE\Software\Microsoft\DevDiv\VC\Servicing\14.0\IDE.x64" /v UpdateVersion`) DO ( -rem set SP=%%A -rem ) - -rem if not "%SP%" == "%PKG_VERSION%" ( -rem echo "Version detected from registry: %SP%" -rem echo "does not match version of package being built (%PKG_VERSION%)" -rem echo "Do you have current updates for VS 2015 installed?" -rem exit 1 -rem ) - - -REM ========== REQUIRES Win 10 SDK be installed, or files otherwise copied to location below! -robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%" "%LIBRARY_BIN%" *.dll /E -robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%" "%PREFIX%" *.dll /E -if %ERRORLEVEL% GEQ 8 exit 1 - -REM ========== This one comes from visual studio 2019 -set "VC_VER=142" - -for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [16^,17^) -property installationPath`) do ( - if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( - set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat" - goto :eof - ) -) - -@setlocal -call "%VS15VARSALL%" x64 - -set "REDIST_ROOT=%VCToolsRedistDir%%VC_PATH%" - -robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%LIBRARY_BIN%" *.dll /E -if %ERRORLEVEL% LSS 8 exit 0 -robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%PREFIX%" *.dll /E -if %ERRORLEVEL% LSS 8 exit 0 -robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%LIBRARY_BIN%" *.dll /E -if %ERRORLEVEL% LSS 8 exit 0 -robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%PREFIX%" *.dll /E -if %ERRORLEVEL% LSS 8 exit 0 -@endlocal diff --git a/packaging/vs2019/meta.yaml b/packaging/vs2019/meta.yaml deleted file mode 100644 index 94a0ed4..0000000 --- a/packaging/vs2019/meta.yaml +++ /dev/null @@ -1,24 +0,0 @@ -{% set vcver="14.2" %} -{% set vcfeature="14" %} -{% set vsyear="2019" %} -{% set fullver="15.4.27004.2010" %} - -package: - name: vs{{ vsyear }} - version: {{ fullver }} - -build: - skip: True [not win] - script_env: - - VSDEVCMD_ARGS # [win] - -outputs: - - name: vs{{ vsyear }}_{{ cross_compiler_target_platform }} - script: install_activate.bat - track_features: - # VS 2019 is binary-compatible with VS 2017/vc 14.1 and 2015/vc14. Tools are "v142". - strong: - - vc{{ vcfeature }} - about: - summary: Activation and version verification of MSVC {{ vcver }} (VS {{ vsyear }}) compiler - license: BSD 3-clause diff --git a/packaging/wheel/linux_manywheel.sh b/packaging/wheel/linux_manywheel.sh deleted file mode 100644 index d6471aa..0000000 --- a/packaging/wheel/linux_manywheel.sh +++ /dev/null @@ -1,62 +0,0 @@ -#!/bin/bash -set -ex - -if [ "$#" -ne 1 ]; then - echo "Illegal number of parameters. Pass cuda version" - echo "CUDA version should be cu92, cu100 or cpu" - exit 1 -fi -export CUVER="$1" # cu[0-9]* cpu - -if [[ "$CUVER" == "cu102" ]]; then - cu_suffix="" -else - cu_suffix="+$CUVER" -fi - -export TORCHCSPRNG_BUILD_VERSION="0.4.0.dev$(date "+%Y%m%d")${cu_suffix}" -export TORCHCSPRNG_BUILD_NUMBER="1" -export TORCHCSPRNG_LOCAL_VERSION_LABEL="$CUVER" -export OUT_DIR="/remote/$CUVER" - -pushd /opt/python -DESIRED_PYTHON=(*/) -popd -for desired_py in "${DESIRED_PYTHON[@]}"; do - python_installations+=("/opt/python/$desired_py") -done - -OLD_PATH=$PATH -cd /tmp -rm -rf csprng -git clone https://github.com/pytorch/csprng - -cd /tmp/csprng - -for PYDIR in "${python_installations[@]}"; do - export PATH=$PYDIR/bin:$OLD_PATH - pip install --upgrade pip - pip install numpy pyyaml future - - pip uninstall -y torch || true - pip uninstall -y torch_nightly || true - - export TORCHCSPRNG_PYTORCH_DEPENDENCY_NAME=torch_nightly - pip install torch_nightly -f https://download.pytorch.org/whl/nightly/$CUVER/torch_nightly.html - # CPU/CUDA variants of PyTorch have ABI compatible PyTorch for - # the CPU only bits. Therefore, we - # strip off the local package qualifier, but ONLY if we're - # doing a CPU build. - if [[ "$CUVER" == "cpu" ]]; then - export TORCHCSPRNG_PYTORCH_DEPENDENCY_VERSION="$(pip show torch_nightly | grep ^Version: | sed 's/Version: \+//' | sed 's/+.\+//')" - else - export TORCHCSPRNG_PYTORCH_DEPENDENCY_VERSION="$(pip show torch_nightly | grep ^Version: | sed 's/Version: \+//')" - fi - echo "Building against ${TORCHCSPRNG_PYTORCH_DEPENDENCY_VERSION}" - - pip install ninja - python setup.py clean - python setup.py bdist_wheel - mkdir -p $OUT_DIR - cp dist/*.whl $OUT_DIR/ -done diff --git a/packaging/wheel/osx_wheel.sh b/packaging/wheel/osx_wheel.sh deleted file mode 100644 index 566f956..0000000 --- a/packaging/wheel/osx_wheel.sh +++ /dev/null @@ -1,52 +0,0 @@ -if [[ ":$PATH:" == *"conda"* ]]; then - echo "existing anaconda install in PATH, remove it and run script" - exit 1 -fi -# download and activate anaconda -rm -rf ~/minconda_wheel_env_tmp -wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh && \ - chmod +x Miniconda3-latest-MacOSX-x86_64.sh && \ - ./Miniconda3-latest-MacOSX-x86_64.sh -b -p ~/minconda_wheel_env_tmp && \ - rm Miniconda3-latest-MacOSX-x86_64.sh - -. ~/minconda_wheel_env_tmp/bin/activate - - -export TORCHCSPRNG_BUILD_VERSION="0.4.0.dev$(date "+%Y%m%d")" -export TORCHCSPRNG_BUILD_NUMBER="1" -export OUT_DIR=~/torchcsprng_wheels - -export MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ - -pushd /tmp -rm -rf csprng -git clone https://github.com/pytorch/csprng -pushd csprng - -desired_pythons=( "2.7" "3.5" "3.6" "3.7" ) -# for each python -for desired_python in "${desired_pythons[@]}" -do - # create and activate python env - env_name="env$desired_python" - conda create -yn $env_name python="$desired_python" - conda activate $env_name - - pip uninstall -y torch || true - pip uninstall -y torch_nightly || true - - export TORCHCSPRNG_PYTORCH_DEPENDENCY_NAME=torch_nightly - pip install torch_nightly -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html - export TORCHCSPRNG_PYTORCH_DEPENDENCY_VERSION="$(pip show torch_nightly | grep ^Version: | sed 's/Version: *//')" - echo "Building against ${TORCHAUDIO_PYTORCH_DEPENDENCY_VERSION}" - - # install torchcsprng dependencies - pip install ninja scipy pytest pycrypto - - python setup.py clean - python setup.py bdist_wheel - mkdir -p $OUT_DIR - cp dist/*.whl $OUT_DIR/ -done -popd -popd diff --git a/packaging/wheel/relocate.py b/packaging/wheel/relocate.py deleted file mode 100644 index fd92b68..0000000 --- a/packaging/wheel/relocate.py +++ /dev/null @@ -1,408 +0,0 @@ -# -*- coding: utf-8 -*- - -"""Helper script to package wheels and relocate binaries.""" - -import glob -import hashlib -import io - -# Standard library imports -import os -import os.path as osp -import platform -import shutil -import subprocess -import sys -import zipfile -from base64 import urlsafe_b64encode - -# Third party imports -if sys.platform == "linux": - from auditwheel.lddtree import lddtree -from wheel.bdist_wheel import get_abi_tag - - -ALLOWLIST = { - "libgcc_s.so.1", - "libstdc++.so.6", - "libm.so.6", - "libdl.so.2", - "librt.so.1", - "libc.so.6", - "libnsl.so.1", - "libutil.so.1", - "libpthread.so.0", - "libresolv.so.2", - "libX11.so.6", - "libXext.so.6", - "libXrender.so.1", - "libICE.so.6", - "libSM.so.6", - "libGL.so.1", - "libgobject-2.0.so.0", - "libgthread-2.0.so.0", - "libglib-2.0.so.0", - "ld-linux-x86-64.so.2", - "ld-2.17.so", -} - -WINDOWS_ALLOWLIST = { - "MSVCP140.dll", - "KERNEL32.dll", - "VCRUNTIME140_1.dll", - "VCRUNTIME140.dll", - "api-ms-win-crt-heap-l1-1-0.dll", - "api-ms-win-crt-runtime-l1-1-0.dll", - "api-ms-win-crt-stdio-l1-1-0.dll", - "api-ms-win-crt-filesystem-l1-1-0.dll", - "api-ms-win-crt-string-l1-1-0.dll", - "api-ms-win-crt-environment-l1-1-0.dll", - "api-ms-win-crt-math-l1-1-0.dll", - "api-ms-win-crt-convert-l1-1-0.dll", -} - - -HERE = osp.dirname(osp.abspath(__file__)) -PACKAGE_ROOT = osp.dirname(osp.dirname(HERE)) -PLATFORM_ARCH = platform.machine() -PYTHON_VERSION = sys.version_info - - -def read_chunks(file, size=io.DEFAULT_BUFFER_SIZE): - """Yield pieces of data from a file-like object until EOF.""" - while True: - chunk = file.read(size) - if not chunk: - break - yield chunk - - -def rehash(path, blocksize=1 << 20): - """Return (hash, length) for path using hashlib.sha256()""" - h = hashlib.sha256() - length = 0 - with open(path, "rb") as f: - for block in read_chunks(f, size=blocksize): - length += len(block) - h.update(block) - digest = "sha256=" + urlsafe_b64encode(h.digest()).decode("latin1").rstrip("=") - # unicode/str python2 issues - return (digest, str(length)) # type: ignore - - -def unzip_file(file, dest): - """Decompress zip `file` into directory `dest`.""" - with zipfile.ZipFile(file, "r") as zip_ref: - zip_ref.extractall(dest) - - -def is_program_installed(basename): - """ - Return program absolute path if installed in PATH. - Otherwise, return None - On macOS systems, a .app is considered installed if - it exists. - """ - if sys.platform == "darwin" and basename.endswith(".app") and osp.exists(basename): - return basename - - for path in os.environ["PATH"].split(os.pathsep): - abspath = osp.join(path, basename) - if osp.isfile(abspath): - return abspath - - -def find_program(basename): - """ - Find program in PATH and return absolute path - Try adding .exe or .bat to basename on Windows platforms - (return None if not found) - """ - names = [basename] - if os.name == "nt": - # Windows platforms - extensions = (".exe", ".bat", ".cmd", ".dll") - if not basename.endswith(extensions): - names = [basename + ext for ext in extensions] + [basename] - for name in names: - path = is_program_installed(name) - if path: - return path - - -def patch_new_path(library_path, new_dir): - library = osp.basename(library_path) - name, *rest = library.split(".") - rest = ".".join(rest) - hash_id = hashlib.sha256(library_path.encode("utf-8")).hexdigest()[:8] - new_name = ".".join([name, hash_id, rest]) - return osp.join(new_dir, new_name) - - -def find_dll_dependencies(dumpbin, binary): - out = subprocess.run([dumpbin, "/dependents", binary], stdout=subprocess.PIPE) - out = out.stdout.strip().decode("utf-8") - start_index = out.find("dependencies:") + len("dependencies:") - end_index = out.find("Summary") - dlls = out[start_index:end_index].strip() - dlls = dlls.split(os.linesep) - dlls = [dll.strip() for dll in dlls] - return dlls - - -def relocate_elf_library(patchelf, output_dir, output_library, binary): - """ - Relocate an ELF shared library to be packaged on a wheel. - - Given a shared library, find the transitive closure of its dependencies, - rename and copy them into the wheel while updating their respective rpaths. - """ - - print("Relocating {0}".format(binary)) - binary_path = osp.join(output_library, binary) - - ld_tree = lddtree(binary_path) - tree_libs = ld_tree["libs"] - - binary_queue = [(n, binary) for n in ld_tree["needed"]] - binary_paths = {binary: binary_path} - binary_dependencies = {} - - while binary_queue != []: - library, parent = binary_queue.pop(0) - library_info = tree_libs[library] - print(library) - - if library_info["path"] is None: - print("Omitting {0}".format(library)) - continue - - if library in ALLOWLIST: - # Omit glibc/gcc/system libraries - print("Omitting {0}".format(library)) - continue - - parent_dependencies = binary_dependencies.get(parent, []) - parent_dependencies.append(library) - binary_dependencies[parent] = parent_dependencies - - if library in binary_paths: - continue - - binary_paths[library] = library_info["path"] - binary_queue += [(n, library) for n in library_info["needed"]] - - print("Copying dependencies to wheel directory") - new_libraries_path = osp.join(output_dir, "torchcsprng.libs") - os.makedirs(new_libraries_path) - - new_names = {binary: binary_path} - - for library in binary_paths: - if library != binary: - library_path = binary_paths[library] - new_library_path = patch_new_path(library_path, new_libraries_path) - print("{0} -> {1}".format(library, new_library_path)) - shutil.copyfile(library_path, new_library_path) - new_names[library] = new_library_path - - print("Updating dependency names by new files") - for library in binary_paths: - if library != binary: - if library not in binary_dependencies: - continue - library_dependencies = binary_dependencies[library] - new_library_name = new_names[library] - for dep in library_dependencies: - new_dep = osp.basename(new_names[dep]) - print("{0}: {1} -> {2}".format(library, dep, new_dep)) - subprocess.check_output( - [patchelf, "--replace-needed", dep, new_dep, new_library_name], - cwd=new_libraries_path, - ) - - print("Updating library rpath") - subprocess.check_output( - [patchelf, "--set-rpath", "$ORIGIN", new_library_name], - cwd=new_libraries_path, - ) - - subprocess.check_output( - [patchelf, "--print-rpath", new_library_name], cwd=new_libraries_path - ) - - print("Update library dependencies") - library_dependencies = binary_dependencies[binary] - for dep in library_dependencies: - new_dep = osp.basename(new_names[dep]) - print("{0}: {1} -> {2}".format(binary, dep, new_dep)) - subprocess.check_output( - [patchelf, "--replace-needed", dep, new_dep, binary], cwd=output_library - ) - - print("Update library rpath") - subprocess.check_output( - [patchelf, "--set-rpath", "$ORIGIN:$ORIGIN/../torchcsprng.libs", binary_path], - cwd=output_library, - ) - - -def relocate_dll_library(dumpbin, output_dir, output_library, binary): - """ - Relocate a DLL/PE shared library to be packaged on a wheel. - - Given a shared library, find the transitive closure of its dependencies, - rename and copy them into the wheel. - """ - print("Relocating {0}".format(binary)) - binary_path = osp.join(output_library, binary) - - library_dlls = find_dll_dependencies(dumpbin, binary_path) - binary_queue = [(dll, binary) for dll in library_dlls] - binary_paths = {binary: binary_path} - binary_dependencies = {} - - while binary_queue != []: - library, parent = binary_queue.pop(0) - if library in WINDOWS_ALLOWLIST or library.startswith("api-ms-win"): - print("Omitting {0}".format(library)) - continue - - library_path = find_program(library) - if library_path is None: - print("{0} not found".format(library)) - continue - - if osp.basename(osp.dirname(library_path)) == "system32": - continue - - print("{0}: {1}".format(library, library_path)) - parent_dependencies = binary_dependencies.get(parent, []) - parent_dependencies.append(library) - binary_dependencies[parent] = parent_dependencies - - if library in binary_paths: - continue - - binary_paths[library] = library_path - downstream_dlls = find_dll_dependencies(dumpbin, library_path) - binary_queue += [(n, library) for n in downstream_dlls] - - print("Copying dependencies to wheel directory") - package_dir = osp.join(output_dir, "torchcsprng") - for library in binary_paths: - if library != binary: - library_path = binary_paths[library] - new_library_path = osp.join(package_dir, library) - print("{0} -> {1}".format(library, new_library_path)) - shutil.copyfile(library_path, new_library_path) - - -def compress_wheel(output_dir, wheel, wheel_dir, wheel_name): - """Create RECORD file and compress wheel distribution.""" - print("Update RECORD file in wheel") - dist_info = glob.glob(osp.join(output_dir, "*.dist-info"))[0] - record_file = osp.join(dist_info, "RECORD") - - with open(record_file, "w") as f: - for root, _, files in os.walk(output_dir): - for this_file in files: - full_file = osp.join(root, this_file) - rel_file = osp.relpath(full_file, output_dir) - if full_file == record_file: - f.write("{0},,\n".format(rel_file)) - else: - digest, size = rehash(full_file) - f.write("{0},{1},{2}\n".format(rel_file, digest, size)) - - print("Compressing wheel") - base_wheel_name = osp.join(wheel_dir, wheel_name) - shutil.make_archive(base_wheel_name, "zip", output_dir) - os.remove(wheel) - shutil.move("{0}.zip".format(base_wheel_name), wheel) - shutil.rmtree(output_dir) - - -def patch_linux(): - # Get patchelf location - patchelf = find_program("patchelf") - if patchelf is None: - raise FileNotFoundError( - "Patchelf was not found in the system, please" - " make sure that is available on the PATH." - ) - - # Find wheel - print("Finding wheels...") - wheels = glob.glob(osp.join(PACKAGE_ROOT, "dist", "*.whl")) - output_dir = osp.join(PACKAGE_ROOT, "dist", ".wheel-process") - - image_binary = "image.so" - video_binary = "video_reader.so" - torchcsprng_binaries = [image_binary, video_binary] - for wheel in wheels: - if osp.exists(output_dir): - shutil.rmtree(output_dir) - - os.makedirs(output_dir) - - print("Unzipping wheel...") - wheel_file = osp.basename(wheel) - wheel_dir = osp.dirname(wheel) - print("{0}".format(wheel_file)) - wheel_name, _ = osp.splitext(wheel_file) - unzip_file(wheel, output_dir) - - print("Finding ELF dependencies...") - output_library = osp.join(output_dir, "torchcsprng") - for binary in torchcsprng_binaries: - if osp.exists(osp.join(output_library, binary)): - relocate_elf_library(patchelf, output_dir, output_library, binary) - - compress_wheel(output_dir, wheel, wheel_dir, wheel_name) - - -def patch_win(): - # Get dumpbin location - dumpbin = find_program("dumpbin") - if dumpbin is None: - raise FileNotFoundError( - "Dumpbin was not found in the system, please" - " make sure that is available on the PATH." - ) - - # Find wheel - print("Finding wheels...") - wheels = glob.glob(osp.join(PACKAGE_ROOT, "dist", "*.whl")) - output_dir = osp.join(PACKAGE_ROOT, "dist", ".wheel-process") - - image_binary = "image.pyd" - video_binary = "video_reader.pyd" - torchcsprng_binaries = [image_binary, video_binary] - for wheel in wheels: - if osp.exists(output_dir): - shutil.rmtree(output_dir) - - os.makedirs(output_dir) - - print("Unzipping wheel...") - wheel_file = osp.basename(wheel) - wheel_dir = osp.dirname(wheel) - print("{0}".format(wheel_file)) - wheel_name, _ = osp.splitext(wheel_file) - unzip_file(wheel, output_dir) - - print("Finding DLL/PE dependencies...") - output_library = osp.join(output_dir, "torchcsprng") - for binary in torchcsprng_binaries: - if osp.exists(osp.join(output_library, binary)): - relocate_dll_library(dumpbin, output_dir, output_library, binary) - - compress_wheel(output_dir, wheel, wheel_dir, wheel_name) - - -if __name__ == "__main__": - if sys.platform == "linux": - patch_linux() - elif sys.platform == "win32": - patch_win() diff --git a/packaging/windows/azure-pipelines-ci.yml b/packaging/windows/azure-pipelines-ci.yml deleted file mode 100644 index 6f9f346..0000000 --- a/packaging/windows/azure-pipelines-ci.yml +++ /dev/null @@ -1,11 +0,0 @@ - -# Turn off auto builds for commits -trigger: none -pr: none - -jobs: -- template: templates/build_task.yml - parameters: - package: 'Wheels' - spec: 'CPU' - msagent: true diff --git a/packaging/windows/azure-pipelines.yml b/packaging/windows/azure-pipelines.yml deleted file mode 100644 index d024057..0000000 --- a/packaging/windows/azure-pipelines.yml +++ /dev/null @@ -1,35 +0,0 @@ - -# Turn off auto builds for commits -trigger: none -pr: none - -jobs: -- template: templates/auth_task.yml - -- template: templates/build_task.yml - parameters: - package: 'Wheels' - spec: 'CPU' - msagent: true - -- template: templates/build_task.yml - parameters: - package: 'Conda' - spec: 'CPU' - msagent: true - -- template: templates/build_task.yml - parameters: - package: 'Wheels' - spec: 'CUDA' - msagent: true - -- template: templates/build_task.yml - parameters: - package: 'Conda' - spec: 'CUDA' - msagent: true - -- template: templates/linux_build_task.yml - parameters: - msagent: $(ms.hosted.agent.cpu) diff --git a/packaging/windows/build_csprng.bat b/packaging/windows/build_csprng.bat deleted file mode 100644 index e6da23d..0000000 --- a/packaging/windows/build_csprng.bat +++ /dev/null @@ -1,145 +0,0 @@ -@echo off - -:: This script parses args, installs required libraries (miniconda, MKL, -:: Magma), and then delegates to cpu.bat, cuda80.bat, etc. - -IF NOT "%CUDA_VERSION%" == "" IF NOT "%TORCHCSPRNG_BUILD_VERSION%" == "" if NOT "%TORCHCSPRNG_BUILD_NUMBER%" == "" goto env_end -if "%~1"=="" goto arg_error -if "%~2"=="" goto arg_error -if "%~3"=="" goto arg_error -if NOT "%~4"=="" goto arg_error -goto arg_end - -:arg_error - -echo Illegal number of parameters. Pass cuda version, pytorch version, build number -echo CUDA version should be Mm with no dot, e.g. '80' -echo DESIRED_PYTHON should be M.m, e.g. '2.7' -exit /b 1 - -:arg_end - -set CUDA_VERSION=%~1 -set TORCHCSPRNG_BUILD_VERSION=%~2 -set TORCHCSPRNG_BUILD_NUMBER=%~3 - -set BUILD_VERSION=%TORCHCSPRNG_BUILD_VERSION% - -:env_end - -if NOT "%CUDA_VERSION%" == "cpu" ( - set CUDA_PREFIX=cuda%CUDA_VERSION% - set CUVER=cu%CUDA_VERSION% - set FORCE_CUDA=1 -) else ( - set CUDA_PREFIX=cpu - set CUVER=cpu -) - -set BUILD_CSPRNG=1 -REM set TORCH_WHEEL=torch -f https://download.pytorch.org/whl/%CUVER%/stable.html --no-index - -IF "%DESIRED_PYTHON%" == "" set DESIRED_PYTHON=3.5;3.6;3.7 -set DESIRED_PYTHON_PREFIX=%DESIRED_PYTHON:.=% -set DESIRED_PYTHON_PREFIX=py%DESIRED_PYTHON_PREFIX:;=;py% - -set SRC_DIR=%~dp0 -pushd %SRC_DIR% - -:: Install Miniconda3 -set "CONDA_HOME=%CD%\conda" -set "tmp_conda=%CONDA_HOME%" -set "miniconda_exe=%CD%\miniconda.exe" -rmdir /s /q conda -del miniconda.exe -curl -k https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe -o "%miniconda_exe%" -call ..\conda\install_conda.bat -IF ERRORLEVEL 1 exit /b 1 -set "ORIG_PATH=%PATH%" -set "PATH=%CONDA_HOME%;%CONDA_HOME%\scripts;%CONDA_HOME%\Library\bin;%PATH%" - -:: Create a new conda environment -setlocal EnableDelayedExpansion -FOR %%v IN (%DESIRED_PYTHON%) DO ( - set PYTHON_VERSION_STR=%%v - set PYTHON_VERSION_STR=!PYTHON_VERSION_STR:.=! - conda remove -n py!PYTHON_VERSION_STR! --all -y || rmdir %CONDA_HOME%\envs\py!PYTHON_VERSION_STR! /s - conda create -n py!PYTHON_VERSION_STR! -y -q -c defaults -c conda-forge numpy>=1.11 mkl>=2018 python=%%v ca-certificates scipy pycrypto -) - -:: Uncomment for stable releases -:: FOR %%v IN (%DESIRED_PYTHON%) DO ( -:: set PYTHON_VERSION_STR=%%v -:: set PYTHON_VERSION_STR=!PYTHON_VERSION_STR:.=! -:: set "PATH=%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!;%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!\scripts;%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!\Library\bin;%ORIG_PATH%" - -:: if "%CUDA_VERSION%" == "100" ( -:: set TORCH_WHEEL=https://download.pytorch.org/whl/%CUVER%/torch-1.2.0-cp!PYTHON_VERSION_STR!-cp!PYTHON_VERSION_STR!m-win_amd64.whl -:: ) else ( -:: set TORCH_WHEEL=https://download.pytorch.org/whl/%CUVER%/torch-1.2.0%%2B%CUVER%-cp!PYTHON_VERSION_STR!-cp!PYTHON_VERSION_STR!m-win_amd64.whl -:: ) -:: echo Installing !TORCH_WHEEL!... -:: pip install "!TORCH_WHEEL!" -:: ) - -:: Uncomment for nightly releases -FOR %%v IN (%DESIRED_PYTHON%) DO ( - set PYTHON_VERSION_STR=%%v - set PYTHON_VERSION_STR=!PYTHON_VERSION_STR:.=! - set "PATH=%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!;%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!\scripts;%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!\Library\bin;%ORIG_PATH%" - - set TORCH_WHEEL=torch --pre -f https://download.pytorch.org/whl/nightly/%CUVER%/torch_nightly.html - echo Installing !TORCH_WHEEL!... - pip install !TORCH_WHEEL! -) - -endlocal - -if "%DEBUG%" == "1" ( - set BUILD_TYPE=debug -) ELSE ( - set BUILD_TYPE=release -) - -:: Install sccache -if "%USE_SCCACHE%" == "1" ( - mkdir %CD%\tmp_bin - curl -k https://s3.amazonaws.com/ossci-windows/sccache.exe --output %CD%\tmp_bin\sccache.exe - if not "%CUDA_VERSION%" == "" ( - copy %CD%\tmp_bin\sccache.exe %CD%\tmp_bin\nvcc.exe - - set CUDA_NVCC_EXECUTABLE=%CD%\tmp_bin\nvcc - set "PATH=%CD%\tmp_bin;%PATH%" - ) -) - -for %%v in (%DESIRED_PYTHON_PREFIX%) do ( - :: Activate Python Environment - set PYTHON_PREFIX=%%v - set "PATH=%CONDA_HOME%\envs\%%v;%CONDA_HOME%\envs\%%v\scripts;%CONDA_HOME%\envs\%%v\Library\bin;%ORIG_PATH%" - if defined INCLUDE ( - set "INCLUDE=%INCLUDE%;%CONDA_HOME%\envs\%%v\Library\include" - ) else ( - set "INCLUDE=%CONDA_HOME%\envs\%%v\Library\include" - ) - if defined LIB ( - set "LIB=%LIB%;%CONDA_HOME%\envs\%%v\Library\lib" - ) else ( - set "LIB=%CONDA_HOME%\envs\%%v\Library\lib" - ) - @setlocal - :: Set Flags - if NOT "%CUDA_VERSION%"=="cpu" ( - set CUDNN_VERSION=7 - ) - call %CUDA_PREFIX%.bat - IF ERRORLEVEL 1 exit /b 1 - call internal\test.bat - IF ERRORLEVEL 1 exit /b 1 - @endlocal -) - -set "PATH=%ORIG_PATH%" -popd - -IF ERRORLEVEL 1 exit /b 1 diff --git a/packaging/windows/cpu.bat b/packaging/windows/cpu.bat deleted file mode 100644 index 1897fb5..0000000 --- a/packaging/windows/cpu.bat +++ /dev/null @@ -1,37 +0,0 @@ -@echo off - -IF NOT "%BUILD_CSPRNG%" == "" ( - set MODULE_NAME=csprng -) ELSE ( - set MODULE_NAME=pytorch -) - -IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" ( - call internal\clone.bat - cd .. - IF ERRORLEVEL 1 goto eof -) ELSE ( - call internal\clean.bat -) - -call internal\check_deps.bat -IF ERRORLEVEL 1 goto eof - -REM Check for optional components - -echo Disabling CUDA -set NO_CUDA=1 -set USE_CUDA=0 - -IF "%BUILD_CSPRNG%" == "" ( - call internal\check_opts.bat - IF ERRORLEVEL 1 goto eof - - call internal\copy_cpu.bat - IF ERRORLEVEL 1 goto eof -) - -call internal\setup.bat -IF ERRORLEVEL 1 goto eof - -:eof diff --git a/packaging/windows/cuda101.bat b/packaging/windows/cuda101.bat deleted file mode 100644 index 016baec..0000000 --- a/packaging/windows/cuda101.bat +++ /dev/null @@ -1,59 +0,0 @@ -@echo off - -IF NOT "%BUILD_CSPRNG%" == "" ( - set MODULE_NAME=csprng -) ELSE ( - set MODULE_NAME=pytorch -) - -IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" ( - call internal\clone.bat - cd .. - IF ERRORLEVEL 1 goto eof -) ELSE ( - call internal\clean.bat -) - -call internal\check_deps.bat -IF ERRORLEVEL 1 goto eof - -REM Check for optional components - -set NO_CUDA= -set CMAKE_GENERATOR=Visual Studio 15 2017 Win64 - -IF "%NVTOOLSEXT_PATH%"=="" ( - echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing - exit /b 1 - goto optcheck -) - -IF "%CUDA_PATH_V10_1%"=="" ( - echo CUDA 10.1 not found, failing - exit /b 1 -) ELSE ( - IF "%BUILD_CSPRNG%" == "" ( - set TORCH_CUDA_ARCH_LIST=3.5;5.0+PTX;6.0;6.1;7.0;7.5 - set TORCH_NVCC_FLAGS=-Xfatbin -compress-all - ) ELSE ( - set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50 - ) - - set "CUDA_PATH=%CUDA_PATH_V10_1%" - set "PATH=%CUDA_PATH_V10_1%\bin;%PATH%" -) - -:optcheck - -IF "%BUILD_CSPRNG%" == "" ( - call internal\check_opts.bat - IF ERRORLEVEL 1 goto eof - - call internal\copy.bat - IF ERRORLEVEL 1 goto eof -) - -call internal\setup.bat -IF ERRORLEVEL 1 goto eof - -:eof diff --git a/packaging/windows/cuda102.bat b/packaging/windows/cuda102.bat deleted file mode 100644 index d5a0bdf..0000000 --- a/packaging/windows/cuda102.bat +++ /dev/null @@ -1,59 +0,0 @@ -@echo off - -IF NOT "%BUILD_CSPRNG%" == "" ( - set MODULE_NAME=csprng -) ELSE ( - set MODULE_NAME=pytorch -) - -IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" ( - call internal\clone.bat - cd .. - IF ERRORLEVEL 1 goto eof -) ELSE ( - call internal\clean.bat -) - -call internal\check_deps.bat -IF ERRORLEVEL 1 goto eof - -REM Check for optional components - -set NO_CUDA= -set CMAKE_GENERATOR=Visual Studio 15 2017 Win64 - -IF "%NVTOOLSEXT_PATH%"=="" ( - echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing - exit /b 1 - goto optcheck -) - -IF "%CUDA_PATH_V10_2%"=="" ( - echo CUDA 10.2 not found, failing - exit /b 1 -) ELSE ( - IF "%BUILD_CSPRNG%" == "" ( - set TORCH_CUDA_ARCH_LIST=3.5;5.0+PTX;6.0;6.1;7.0;7.5 - set TORCH_NVCC_FLAGS=-Xfatbin -compress-all - ) ELSE ( - set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50 - ) - - set "CUDA_PATH=%CUDA_PATH_V10_2%" - set "PATH=%CUDA_PATH_V10_2%\bin;%PATH%" -) - -:optcheck - -IF "%BUILD_CSPRNG%" == "" ( - call internal\check_opts.bat - IF ERRORLEVEL 1 goto eof - - call internal\copy.bat - IF ERRORLEVEL 1 goto eof -) - -call internal\setup.bat -IF ERRORLEVEL 1 goto eof - -:eof diff --git a/packaging/windows/cuda92.bat b/packaging/windows/cuda92.bat deleted file mode 100644 index 7f520da..0000000 --- a/packaging/windows/cuda92.bat +++ /dev/null @@ -1,59 +0,0 @@ -@echo off - -IF NOT "%BUILD_CSPRNG%" == "" ( - set MODULE_NAME=csprng -) ELSE ( - set MODULE_NAME=pytorch -) - -IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" ( - call internal\clone.bat - cd .. - IF ERRORLEVEL 1 goto eof -) ELSE ( - call internal\clean.bat -) - -call internal\check_deps.bat -IF ERRORLEVEL 1 goto eof - -REM Check for optional components - -set USE_CUDA= -set CMAKE_GENERATOR=Visual Studio 15 2017 Win64 - -IF "%NVTOOLSEXT_PATH%"=="" ( - echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing - exit /b 1 - goto optcheck -) - -IF "%CUDA_PATH_V9_2%"=="" ( - echo CUDA 9.2 not found, failing - exit /b 1 -) ELSE ( - IF "%BUILD_CSPRNG%" == "" ( - set TORCH_CUDA_ARCH_LIST=3.5;5.0+PTX;6.0;6.1;7.0 - set TORCH_NVCC_FLAGS=-Xfatbin -compress-all - ) ELSE ( - set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_61,code=sm_61 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50 - ) - - set "CUDA_PATH=%CUDA_PATH_V9_2%" - set "PATH=%CUDA_PATH_V9_2%\bin;%PATH%" -) - -:optcheck - -IF "%BUILD_CSPRNG%" == "" ( - call internal\check_opts.bat - IF ERRORLEVEL 1 goto eof - - call internal\copy.bat - IF ERRORLEVEL 1 goto eof -) - -call internal\setup.bat -IF ERRORLEVEL 1 goto eof - -:eof diff --git a/packaging/windows/internal/auth.bat b/packaging/windows/internal/auth.bat deleted file mode 100644 index c874bce..0000000 --- a/packaging/windows/internal/auth.bat +++ /dev/null @@ -1,46 +0,0 @@ -@echo off - -: From the following doc, the build won't be triggered if the users don't sign in daily. -: https://docs.microsoft.com/en-us/azure/devops/pipelines/build/triggers?tabs=yaml&view=vsts#my-build-didnt-run-what-happened -: To avoid this problem, we can just go through the sign in process using the following command. - -:auth_start - -if "%RETRY_TIMES%" == "" ( - set /a RETRY_TIMES=10 - set /a SLEEP_TIME=2 -) else ( - set /a RETRY_TIMES=%RETRY_TIMES%-1 - set /a SLEEP_TIME=%SLEEP_TIME%*2 -) - -for /f "usebackq tokens=*" %%i in (`curl -so NUL -w "%%{http_code}" -u %VSTS_AUTH% https://dev.azure.com/pytorch`) do ( - set STATUS_CODE=%%i -) - -IF NOT "%STATUS_CODE%" == "200" ( - echo Auth retry times remaining: %RETRY_TIMES% - echo Sleep time: %SLEEP_TIME% seconds - IF %RETRY_TIMES% EQU 0 ( - echo Auth failed - goto err - ) - waitfor SomethingThatIsNeverHappening /t %SLEEP_TIME% 2>nul || ver >nul - goto auth_start -) ELSE ( - echo Login Attempt Succeeded - goto auth_end -) - -:err - -: Throw a warning if it fails -powershell -c "Write-Warning 'Login Attempt Failed'" - -:auth_end - -set RETRY_TIMES= -set SLEEP_TIME= -set STATUS_CODE= - -exit /b 0 diff --git a/packaging/windows/internal/build_conda.bat b/packaging/windows/internal/build_conda.bat deleted file mode 100644 index 6ffd67b..0000000 --- a/packaging/windows/internal/build_conda.bat +++ /dev/null @@ -1,15 +0,0 @@ -if "%VC_YEAR%" == "2017" set VSDEVCMD_ARGS=-vcvars_ver=14.13 -if "%VC_YEAR%" == "2017" powershell packaging/windows/internal/vs2017_install.ps1 -if errorlevel 1 exit /b 1 - -call packaging/windows/internal/cuda_install.bat -if errorlevel 1 exit /b 1 - -call packaging/windows/internal/nightly_defaults.bat Conda -if errorlevel 1 exit /b 1 - -set PYTORCH_FINAL_PACKAGE_DIR=%CD%\packaging\windows\output -if not exist "%PYTORCH_FINAL_PACKAGE_DIR%" mkdir %PYTORCH_FINAL_PACKAGE_DIR% - -bash ./packaging/conda/build_csprng.sh %CUDA_VERSION% %TORCHCSPRNG_BUILD_VERSION% %TORCHCSPRNG_BUILD_NUMBER% -if errorlevel 1 exit /b 1 diff --git a/packaging/windows/internal/build_wheels.bat b/packaging/windows/internal/build_wheels.bat deleted file mode 100644 index 876b8b0..0000000 --- a/packaging/windows/internal/build_wheels.bat +++ /dev/null @@ -1,12 +0,0 @@ -if "%VC_YEAR%" == "2017" set VSDEVCMD_ARGS=-vcvars_ver=14.13 -if "%VC_YEAR%" == "2017" powershell packaging/windows/internal/vs2017_install.ps1 -if errorlevel 1 exit /b 1 - -call packaging/windows/internal/cuda_install.bat -if errorlevel 1 exit /b 1 - -call packaging/windows/internal/nightly_defaults.bat Wheels -if errorlevel 1 exit /b 1 - -call packaging/windows/build_csprng.bat %CUDA_VERSION% %TORCHCSPRNG_BUILD_VERSION% %TORCHCSPRNG_BUILD_NUMBER% -if errorlevel 1 exit /b 1 diff --git a/packaging/windows/internal/check_deps.bat b/packaging/windows/internal/check_deps.bat deleted file mode 100644 index 739e568..0000000 --- a/packaging/windows/internal/check_deps.bat +++ /dev/null @@ -1,67 +0,0 @@ -@echo off - -REM Check for necessary components - -IF NOT "%PROCESSOR_ARCHITECTURE%"=="AMD64" ( - echo You should use 64 bits Windows to build and run PyTorch - exit /b 1 -) - -IF "%BUILD_CSPRNG%" == "" ( - where /q cmake.exe - - IF ERRORLEVEL 1 ( - echo CMake is required to compile PyTorch on Windows - exit /b 1 - ) -) - -IF NOT EXIST "%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" ( - echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch on Windows - exit /b 1 -) - -for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do ( - if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( - set "VS15INSTALLDIR=%%i" - set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat" - goto vswhere - ) -) - -:vswhere -IF "%VS15VCVARSALL%"=="" ( - echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch on Windows - exit /b 1 -) - -set MSSdk=1 -set DISTUTILS_USE_SDK=1 - -where /q python.exe - -IF ERRORLEVEL 1 ( - echo Python x64 3.5 or up is required to compile PyTorch on Windows - exit /b 1 -) - -for /F "usebackq delims=" %%i in (`python -c "import sys; print('{0[0]}{0[1]}'.format(sys.version_info))"`) do ( - set /a PYVER=%%i -) - -if %PYVER% LSS 35 ( - echo Warning: PyTorch for Python 2 under Windows is experimental. - echo Python x64 3.5 or up is recommended to compile PyTorch on Windows - echo Maybe you can create a virual environment if you have conda installed: - echo ^> conda create -n test python=3.6 pyyaml mkl numpy - echo ^> activate test -) - -for /F "usebackq delims=" %%i in (`python -c "import struct;print( 8 * struct.calcsize('P'))"`) do ( - set /a PYSIZE=%%i -) - -if %PYSIZE% NEQ 64 ( - echo Python x64 3.5 or up is required to compile PyTorch on Windows - exit /b 1 -) diff --git a/packaging/windows/internal/check_opts.bat b/packaging/windows/internal/check_opts.bat deleted file mode 100644 index 003ad92..0000000 --- a/packaging/windows/internal/check_opts.bat +++ /dev/null @@ -1,33 +0,0 @@ -@echo off - -REM Check for optional components - -where /q ninja.exe - -IF NOT ERRORLEVEL 1 ( - echo Ninja found, using it to speed up builds - set CMAKE_GENERATOR=Ninja -) - -where /q clcache.exe - -IF NOT ERRORLEVEL 1 ( - echo clcache found, using it to speed up builds - set CC=clcache - set CXX=clcache -) - -where /q sccache.exe - -IF NOT ERRORLEVEL 1 ( - echo sccache found, using it to speed up builds - set CC=sccache cl - set CXX=sccache cl -) - -IF exist "%MKLProductDir%\mkl\lib\intel64_win" ( - echo MKL found, adding it to build - set "LIB=%MKLProductDir%\mkl\lib\intel64_win;%MKLProductDir%\compiler\lib\intel64_win;%LIB%"; -) - -exit /b 0 diff --git a/packaging/windows/internal/clean.bat b/packaging/windows/internal/clean.bat deleted file mode 100644 index 7489640..0000000 --- a/packaging/windows/internal/clean.bat +++ /dev/null @@ -1,5 +0,0 @@ -@echo off - -cd %MODULE_NAME% -python setup.py clean -cd .. diff --git a/packaging/windows/internal/clone.bat b/packaging/windows/internal/clone.bat deleted file mode 100644 index 758527c..0000000 --- a/packaging/windows/internal/clone.bat +++ /dev/null @@ -1,56 +0,0 @@ -@echo off - -:: The conda and wheels jobs are seperated on Windows, so we don't need to clone again. -IF "%BUILD_CSPRNG%" == "" ( - if exist "%NIGHTLIES_PYTORCH_ROOT%" ( - xcopy /E /Y /Q "%NIGHTLIES_PYTORCH_ROOT%" pytorch\ - cd pytorch - goto submodule - ) -) - -git clone https://github.com/%PYTORCH_REPO%/%MODULE_NAME% - -cd %MODULE_NAME% - -IF NOT "%BUILD_CSPRNG%" == "" goto latest_end - -IF "%PYTORCH_BRANCH%" == "latest" ( goto latest_start ) else ( goto latest_end ) - -:latest_start - -if "%NIGHTLIES_DATE%" == "" ( goto date_start ) else ( goto date_end ) - -:date_start - -set "DATE_CMD=Get-Date ([System.TimeZoneInfo]::ConvertTimeFromUtc((Get-Date).ToUniversalTime(), [System.TimeZoneInfo]::FindSystemTimeZoneById('Pacific Standard Time'))) -f 'yyyy_MM_dd'" -set "DATE_COMPACT_CMD=Get-Date ([System.TimeZoneInfo]::ConvertTimeFromUtc((Get-Date).ToUniversalTime(), [System.TimeZoneInfo]::FindSystemTimeZoneById('Pacific Standard Time'))) -f 'yyyyMMdd'" - -FOR /F "delims=" %%i IN ('powershell -c "%DATE_CMD%"') DO set NIGHTLIES_DATE=%%i -FOR /F "delims=" %%i IN ('powershell -c "%DATE_COMPACT_CMD%"') DO set NIGHTLIES_DATE_COMPACT=%%i - -:date_end - -if "%NIGHTLIES_DATE_COMPACT%" == "" set NIGHTLIES_DATE_COMPACT=%NIGHTLIES_DATE:~0,4%%NIGHTLIES_DATE:~5,2%%NIGHTLIES_DATE:~8,2% - -:: Switch to the latest commit by 11:59 yesterday -echo PYTORCH_BRANCH is set to latest so I will find the last commit -echo before 0:00 midnight on %NIGHTLIES_DATE% -set git_date=%NIGHTLIES_DATE:_=-% -FOR /F "delims=" %%i IN ('git log --before %git_date% -n 1 "--pretty=%%H"') DO set last_commit=%%i -echo Setting PYTORCH_BRANCH to %last_commit% since that was the last -echo commit before %NIGHTLIES_DATE% -set PYTORCH_BRANCH=%last_commit% - -:latest_end - -IF "%PYTORCH_BRANCH%" == "" ( - set PYTORCH_BRANCH=v%TORCHCSPRNG_BUILD_VERSION% -) -git checkout %PYTORCH_BRANCH% -IF ERRORLEVEL 1 git checkout tags/%PYTORCH_BRANCH% - -:submodule - -git submodule update --init --recursive -IF ERRORLEVEL 1 exit /b 1 diff --git a/packaging/windows/internal/copy.bat b/packaging/windows/internal/copy.bat deleted file mode 100644 index b4aa397..0000000 --- a/packaging/windows/internal/copy.bat +++ /dev/null @@ -1,13 +0,0 @@ -copy "%CUDA_PATH%\bin\cusparse64_%CUDA_VERSION%.dll*" pytorch\torch\lib -copy "%CUDA_PATH%\bin\cublas64_%CUDA_VERSION%.dll*" pytorch\torch\lib -copy "%CUDA_PATH%\bin\cudart64_%CUDA_VERSION%.dll*" pytorch\torch\lib -copy "%CUDA_PATH%\bin\curand64_%CUDA_VERSION%.dll*" pytorch\torch\lib -copy "%CUDA_PATH%\bin\cufft64_%CUDA_VERSION%.dll*" pytorch\torch\lib -copy "%CUDA_PATH%\bin\cufftw64_%CUDA_VERSION%.dll*" pytorch\torch\lib - -copy "%CUDA_PATH%\bin\cudnn64_%CUDNN_VERSION%.dll*" pytorch\torch\lib -copy "%CUDA_PATH%\bin\nvrtc64_%CUDA_VERSION%*.dll*" pytorch\torch\lib -copy "%CUDA_PATH%\bin\nvrtc-builtins64_%CUDA_VERSION%.dll*" pytorch\torch\lib - -copy "C:\Program Files\NVIDIA Corporation\NvToolsExt\bin\x64\nvToolsExt64_1.dll*" pytorch\torch\lib -copy "%CONDA_LIB_PATH%\libiomp*5md.dll" pytorch\torch\lib diff --git a/packaging/windows/internal/copy_cpu.bat b/packaging/windows/internal/copy_cpu.bat deleted file mode 100644 index f5b9d11..0000000 --- a/packaging/windows/internal/copy_cpu.bat +++ /dev/null @@ -1 +0,0 @@ -copy "%CONDA_LIB_PATH%\libiomp*5md.dll" pytorch\torch\lib diff --git a/packaging/windows/internal/cuda_install.bat b/packaging/windows/internal/cuda_install.bat deleted file mode 100644 index 9ca08e1..0000000 --- a/packaging/windows/internal/cuda_install.bat +++ /dev/null @@ -1,201 +0,0 @@ -@echo on - -if "%CU_VERSION%" == "cpu" ( - echo Skipping for CPU builds - exit /b 0 -) - -set SRC_DIR=%~dp0\.. - -if not exist "%SRC_DIR%\temp_build" mkdir "%SRC_DIR%\temp_build" - -set /a CUDA_VER=%CU_VERSION:cu=% -set CUDA_VER_MAJOR=%CUDA_VER:~0,-1% -set CUDA_VER_MINOR=%CUDA_VER:~-1,1% -set CUDA_VERSION_STR=%CUDA_VER_MAJOR%.%CUDA_VER_MINOR% - -if %CUDA_VER% EQU 92 goto cuda92 -if %CUDA_VER% EQU 100 goto cuda100 -if %CUDA_VER% EQU 101 goto cuda101 -if %CUDA_VER% EQU 102 goto cuda102 -if %CUDA_VER% EQU 110 goto cuda110 -if %CUDA_VER% EQU 111 goto cuda111 -if %CUDA_VER% EQU 112 goto cuda112 - -echo CUDA %CUDA_VERSION_STR% is not supported -exit /b 1 - -:cuda92 -if not exist "%SRC_DIR%\temp_build\cuda_9.2.148_win10.exe" ( - curl -k -L https://ossci-windows.s3.amazonaws.com/win2016/cuda_9.2.148_win10.exe --output "%SRC_DIR%\temp_build\cuda_9.2.148_win10.exe" - if errorlevel 1 exit /b 1 - set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_9.2.148_win10.exe" - set "ARGS=nvcc_9.2 cuobjdump_9.2 nvprune_9.2 cupti_9.2 cublas_9.2 cublas_dev_9.2 cudart_9.2 cufft_9.2 cufft_dev_9.2 curand_9.2 curand_dev_9.2 cusolver_9.2 cusolver_dev_9.2 cusparse_9.2 cusparse_dev_9.2 nvgraph_9.2 nvgraph_dev_9.2 npp_9.2 npp_dev_9.2 nvrtc_9.2 nvrtc_dev_9.2 nvml_dev_9.2" -) - -if not exist "%SRC_DIR%\temp_build\cudnn-9.2-windows10-x64-v7.2.1.38.zip" ( - curl -k -L https://ossci-windows.s3.amazonaws.com/win2016/cudnn-9.2-windows10-x64-v7.2.1.38.zip --output "%SRC_DIR%\temp_build\cudnn-9.2-windows10-x64-v7.2.1.38.zip" - if errorlevel 1 exit /b 1 - set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-9.2-windows10-x64-v7.2.1.38.zip" -) - -goto cuda_common - -:cuda100 - -if not exist "%SRC_DIR%\temp_build\cuda_10.0.130_411.31_win10.exe" ( - curl -k -L https://ossci-windows.s3.amazonaws.com/win2016/cuda_10.0.130_411.31_win10.exe --output "%SRC_DIR%\temp_build\cuda_10.0.130_411.31_win10.exe" - if errorlevel 1 exit /b 1 - set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_10.0.130_411.31_win10.exe" - set "ARGS=nvcc_10.0 cuobjdump_10.0 nvprune_10.0 cupti_10.0 cublas_10.0 cublas_dev_10.0 cudart_10.0 cufft_10.0 cufft_dev_10.0 curand_10.0 curand_dev_10.0 cusolver_10.0 cusolver_dev_10.0 cusparse_10.0 cusparse_dev_10.0 nvgraph_10.0 nvgraph_dev_10.0 npp_10.0 npp_dev_10.0 nvrtc_10.0 nvrtc_dev_10.0 nvml_dev_10.0" -) - -if not exist "%SRC_DIR%\temp_build\cudnn-10.0-windows10-x64-v7.4.1.5.zip" ( - curl -k -L https://ossci-windows.s3.amazonaws.com/win2016/cudnn-10.0-windows10-x64-v7.4.1.5.zip --output "%SRC_DIR%\temp_build\cudnn-10.0-windows10-x64-v7.4.1.5.zip" - if errorlevel 1 exit /b 1 - set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-10.0-windows10-x64-v7.4.1.5.zip" -) - -goto cuda_common - -:cuda101 - -if not exist "%SRC_DIR%\temp_build\cuda_10.1.243_426.00_win10.exe" ( - curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_10.1.243_426.00_win10.exe --output "%SRC_DIR%\temp_build\cuda_10.1.243_426.00_win10.exe" - if errorlevel 1 exit /b 1 - set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_10.1.243_426.00_win10.exe" - set "ARGS=nvcc_10.1 cuobjdump_10.1 nvprune_10.1 cupti_10.1 cublas_10.1 cublas_dev_10.1 cudart_10.1 cufft_10.1 cufft_dev_10.1 curand_10.1 curand_dev_10.1 cusolver_10.1 cusolver_dev_10.1 cusparse_10.1 cusparse_dev_10.1 nvgraph_10.1 nvgraph_dev_10.1 npp_10.1 npp_dev_10.1 nvrtc_10.1 nvrtc_dev_10.1 nvml_dev_10.1" -) - -if not exist "%SRC_DIR%\temp_build\cudnn-10.1-windows10-x64-v7.6.4.38.zip" ( - curl -k -L https://ossci-windows.s3.amazonaws.com/cudnn-10.1-windows10-x64-v7.6.4.38.zip --output "%SRC_DIR%\temp_build\cudnn-10.1-windows10-x64-v7.6.4.38.zip" - if errorlevel 1 exit /b 1 - set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-10.1-windows10-x64-v7.6.4.38.zip" -) - -goto cuda_common - -:cuda102 - -if not exist "%SRC_DIR%\temp_build\cuda_10.2.89_441.22_win10.exe" ( - curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_10.2.89_441.22_win10.exe --output "%SRC_DIR%\temp_build\cuda_10.2.89_441.22_win10.exe" - if errorlevel 1 exit /b 1 - set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_10.2.89_441.22_win10.exe" - set "ARGS=nvcc_10.2 cuobjdump_10.2 nvprune_10.2 cupti_10.2 cublas_10.2 cublas_dev_10.2 cudart_10.2 cufft_10.2 cufft_dev_10.2 curand_10.2 curand_dev_10.2 cusolver_10.2 cusolver_dev_10.2 cusparse_10.2 cusparse_dev_10.2 nvgraph_10.2 nvgraph_dev_10.2 npp_10.2 npp_dev_10.2 nvrtc_10.2 nvrtc_dev_10.2 nvml_dev_10.2" -) - -if not exist "%SRC_DIR%\temp_build\cudnn-10.2-windows10-x64-v7.6.5.32.zip" ( - curl -k -L https://ossci-windows.s3.amazonaws.com/cudnn-10.2-windows10-x64-v7.6.5.32.zip --output "%SRC_DIR%\temp_build\cudnn-10.2-windows10-x64-v7.6.5.32.zip" - if errorlevel 1 exit /b 1 - set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-10.2-windows10-x64-v7.6.5.32.zip" -) - -goto cuda_common - -:cuda110 - -if not exist "%SRC_DIR%\temp_build\cuda_11.0.2_451.48_win10.exe" ( - curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_11.0.2_451.48_win10.exe --output "%SRC_DIR%\temp_build\cuda_11.0.2_451.48_win10.exe" - if errorlevel 1 exit /b 1 - set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_11.0.2_451.48_win10.exe" - set "ARGS=nvcc_11.0 cuobjdump_11.0 nvprune_11.0 nvprof_11.0 cupti_11.0 cublas_11.0 cublas_dev_11.0 cudart_11.0 cufft_11.0 cufft_dev_11.0 curand_11.0 curand_dev_11.0 cusolver_11.0 cusolver_dev_11.0 cusparse_11.0 cusparse_dev_11.0 npp_11.0 npp_dev_11.0 nvrtc_11.0 nvrtc_dev_11.0 nvml_dev_11.0" -) - -if not exist "%SRC_DIR%\temp_build\cudnn-11.0-windows-x64-v8.0.4.30.zip" ( - curl -k -L https://ossci-windows.s3.amazonaws.com/cudnn-11.0-windows-x64-v8.0.4.30.zip --output "%SRC_DIR%\temp_build\cudnn-11.0-windows-x64-v8.0.4.30.zip" - if errorlevel 1 exit /b 1 - set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-11.0-windows-x64-v8.0.4.30.zip" -) - -goto cuda_common - -:cuda111 - -if not exist "%SRC_DIR%\temp_build\cuda_11.1.0_456.43_win10.exe" ( - curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_11.1.0_456.43_win10.exe --output "%SRC_DIR%\temp_build\cuda_11.1.0_456.43_win10.exe" - if errorlevel 1 exit /b 1 - set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_11.1.0_456.43_win10.exe" - set "ARGS=nvcc_11.1 cuobjdump_11.1 nvprune_11.1 nvprof_11.1 cupti_11.1 cublas_11.1 cublas_dev_11.1 cudart_11.1 cufft_11.1 cufft_dev_11.1 curand_11.1 curand_dev_11.1 cusolver_11.1 cusolver_dev_11.1 cusparse_11.1 cusparse_dev_11.1 npp_11.1 npp_dev_11.1 nvrtc_11.1 nvrtc_dev_11.1 nvml_dev_11.1" -) - -@REM There is no downloadable driver for Tesla on CUDA 11.1 yet. We will use -@REM the driver inside CUDA -if "%JOB_EXECUTOR%" == "windows-with-nvidia-gpu" set "ARGS=%ARGS% Display.Driver" - -if not exist "%SRC_DIR%\temp_build\cudnn-11.1-windows-x64-v8.0.5.39.zip" ( - curl -k -L https://ossci-windows.s3.amazonaws.com/cudnn-11.1-windows-x64-v8.0.5.39.zip --output "%SRC_DIR%\temp_build\cudnn-11.1-windows-x64-v8.0.5.39.zip" - if errorlevel 1 exit /b 1 - set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-11.1-windows-x64-v8.0.5.39.zip" -) - -goto cuda_common - -:cuda112 - -if not exist "%SRC_DIR%\temp_build\cuda_11.2.0_460.89_win10.exe" ( - curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_11.2.0_460.89_win10.exe --output "%SRC_DIR%\temp_build\cuda_11.2.0_460.89_win10.exe" - if errorlevel 1 exit /b 1 - set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_11.2.0_460.89_win10.exe" - set "ARGS=nvcc_11.2 cuobjdump_11.2 nvprune_11.2 nvprof_11.2 cupti_11.2 cublas_11.2 cublas_dev_11.2 cudart_11.2 cufft_11.2 cufft_dev_11.2 curand_11.2 curand_dev_11.2 cusolver_11.2 cusolver_dev_11.2 cusparse_11.2 cusparse_dev_11.2 npp_11.2 npp_dev_11.2 nvrtc_11.2 nvrtc_dev_11.2 nvml_dev_11.2" -) - -if not exist "%SRC_DIR%\temp_build\cudnn-11.2-windows-x64-v8.1.0.77.zip" ( - curl -k -L http://s3.amazonaws.com/ossci-windows/cudnn-11.2-windows-x64-v8.1.0.77.zip --output "%SRC_DIR%\temp_build\cudnn-11.2-windows-x64-v8.1.0.77.zip" - if errorlevel 1 exit /b 1 - set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-11.2-windows-x64-v8.1.0.77.zip" -) - -goto cuda_common - -:cuda_common - -if not exist "%SRC_DIR%\temp_build\NvToolsExt.7z" ( - curl -k -L https://www.dropbox.com/s/9mcolalfdj4n979/NvToolsExt.7z?dl=1 --output "%SRC_DIR%\temp_build\NvToolsExt.7z" - if errorlevel 1 exit /b 1 -) - -if not exist "%SRC_DIR%\temp_build\gpu_driver_dlls.7z" ( - curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "%SRC_DIR%\temp_build\gpu_driver_dlls.zip" - if errorlevel 1 exit /b 1 -) - -echo Installing CUDA toolkit... -7z x %CUDA_SETUP_FILE% -o"%SRC_DIR%\temp_build\cuda" -pushd "%SRC_DIR%\temp_build\cuda" -start /wait setup.exe -s %ARGS% -popd - -echo Installing VS integration... -xcopy /Y "%SRC_DIR%\temp_build\cuda\CUDAVisualStudioIntegration\extras\visual_studio_integration\MSBuildExtensions\*.*" "C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\Common7\IDE\VC\VCTargets\BuildCustomizations" - -echo Installing NvToolsExt... -7z x %SRC_DIR%\temp_build\NvToolsExt.7z -o"%SRC_DIR%\temp_build\NvToolsExt" -mkdir "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\bin\x64" -mkdir "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\include" -mkdir "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\lib\x64" -xcopy /Y "%SRC_DIR%\temp_build\NvToolsExt\bin\x64\*.*" "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\bin\x64" -xcopy /Y "%SRC_DIR%\temp_build\NvToolsExt\include\*.*" "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\include" -xcopy /Y "%SRC_DIR%\temp_build\NvToolsExt\lib\x64\*.*" "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\lib\x64" - -echo Setting up environment... -set "PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\libnvvp;%PATH%" -set "CUDA_PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%" -set "CUDA_PATH_V%CUDA_VER_MAJOR%_%CUDA_VER_MINOR%=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%" -set "NVTOOLSEXT_PATH=%ProgramFiles%\NVIDIA Corporation\NvToolsExt\bin\x64" - -if not exist "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin\nvcc.exe" ( - echo CUDA %CUDA_VERSION_STR% installed failed. - exit /b 1 -) - -echo Installing cuDNN... -7z x %CUDNN_SETUP_FILE% -o"%SRC_DIR%\temp_build\cudnn" -xcopy /Y "%SRC_DIR%\temp_build\cudnn\cuda\bin\*.*" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin" -xcopy /Y "%SRC_DIR%\temp_build\cudnn\cuda\lib\x64\*.*" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\lib\x64" -xcopy /Y "%SRC_DIR%\temp_build\cudnn\cuda\include\*.*" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\include" - -echo Installing GPU driver DLLs -7z x %SRC_DIR%\temp_build\gpu_driver_dlls.zip -o"C:\Windows\System32" - -echo Cleaning temp files -rd /s /q "%SRC_DIR%\temp_build" || ver > nul diff --git a/packaging/windows/internal/dep_install.bat b/packaging/windows/internal/dep_install.bat deleted file mode 100644 index db665a9..0000000 --- a/packaging/windows/internal/dep_install.bat +++ /dev/null @@ -1,14 +0,0 @@ -@echo off - -REM curl -k https://www.7-zip.org/a/7z1805-x64.exe -O -REM if errorlevel 1 exit /b 1 - -REM start /wait 7z1805-x64.exe /S -REM if errorlevel 1 exit /b 1 - -REM set "PATH=%ProgramFiles%\7-Zip;%PATH%" - -choco feature disable --name showDownloadProgress -choco feature enable --name allowGlobalConfirmation - -choco install curl 7zip diff --git a/packaging/windows/internal/env_fix.bat b/packaging/windows/internal/env_fix.bat deleted file mode 100644 index dd0aaf5..0000000 --- a/packaging/windows/internal/env_fix.bat +++ /dev/null @@ -1,31 +0,0 @@ -@echo off - -:: Caution: Please don't use this script locally -:: It may destroy your build environment. - -setlocal - -IF NOT EXIST "%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" ( - echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch on Windows - exit /b 1 -) - -for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do ( - if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( - set "VS15INSTALLDIR=%%i" - set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat" - goto vswhere - ) -) - -:vswhere - -IF "%VS15VCVARSALL%"=="" ( - echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch on Windows - exit /b 1 -) - -call "%VS15VCVARSALL%" x86_amd64 -for /f "usebackq tokens=*" %%i in (`where link.exe`) do move "%%i" "%%i.bak" - -endlocal diff --git a/packaging/windows/internal/nightly_defaults.bat b/packaging/windows/internal/nightly_defaults.bat deleted file mode 100644 index 2b5ca5c..0000000 --- a/packaging/windows/internal/nightly_defaults.bat +++ /dev/null @@ -1,200 +0,0 @@ -@echo on - -if "%~1"=="" goto arg_error -if NOT "%~2"=="" goto arg_error -goto arg_end - -:arg_error - -echo Illegal number of parameters. Pass packge type `Conda` or `Wheels`. -exit /b 1 - -:arg_end - -echo "nightly_defaults.bat at %CD% starting at %DATE%" - -set SRC_DIR=%~dp0\.. - -:: NIGHTLIES_FOLDER -:: N.B. this is also defined in cron_start.sh -:: An arbitrary root folder to store all nightlies folders, each of which is a -:: parent level date folder with separate subdirs for logs, wheels, conda -:: packages, etc. This should be kept the same across all scripts called in a -:: cron job, so it only has a default value in the top-most script -:: build_cron.sh to avoid the default values from diverging. -if "%NIGHTLIES_FOLDER%" == "" set "NIGHTLIES_FOLDER=%SRC_DIR%" - -:: NIGHTLIES_DATE -:: N.B. this is also defined in cron_start.sh -:: The date in YYYY_mm_dd format that we are building for. If this is not -:: already set, then this will first try to find the date of the nightlies -:: folder that this builder repo exists in; e.g. if this script exists in -:: some_dir/2019_09_04/builder/cron/ then this will be set to 2019_09_04 (must -:: match YYYY_mm_dd). This is for convenience when debugging/uploading past -:: dates, so that you don't have to set NIGHTLIES_DATE yourself. If a date -:: folder cannot be found in that exact location, then this will default to -:: the current date. - - -if "%NIGHTLIES_DATE%" == "" ( goto date_start ) else ( goto date_end ) - -:date_start - -set "DATE_CMD=Get-Date ([System.TimeZoneInfo]::ConvertTimeFromUtc((Get-Date).ToUniversalTime(), [System.TimeZoneInfo]::FindSystemTimeZoneById('Pacific Standard Time'))) -f 'yyyy_MM_dd'" -set "DATE_COMPACT_CMD=Get-Date ([System.TimeZoneInfo]::ConvertTimeFromUtc((Get-Date).ToUniversalTime(), [System.TimeZoneInfo]::FindSystemTimeZoneById('Pacific Standard Time'))) -f 'yyyyMMdd'" - -FOR /F "delims=" %%i IN ('powershell -c "%DATE_CMD%"') DO set NIGHTLIES_DATE=%%i -FOR /F "delims=" %%i IN ('powershell -c "%DATE_COMPACT_CMD%"') DO set NIGHTLIES_DATE_COMPACT=%%i - -:date_end - -if "%NIGHTLIES_DATE_COMPACT%" == "" set NIGHTLIES_DATE_COMPACT=%NIGHTLIES_DATE:~0,4%%NIGHTLIES_DATE:~5,2%%NIGHTLIES_DATE:~8,2% - -:: Used in lots of places as the root dir to store all conda/wheel/manywheel -:: packages as well as logs for the day -set today=%NIGHTLIES_FOLDER%\%NIGHTLIES_DATE% -mkdir "%today%" || ver >nul - - -::############################################################################# -:: Add new configuration variables below this line. 'today' should always be -:: defined ASAP to avoid weird errors -::############################################################################# - - -:: List of people to email when things go wrong. This is passed directly to -:: `mail -t` -:: TODO: Not supported yet -if "%NIGHTLIES_EMAIL_LIST%" == "" set NIGHTLIES_EMAIL_LIST=peterghost86@gmail.com - -:: PYTORCH_CREDENTIALS_FILE -:: A bash file that exports credentials needed to upload to aws and anaconda. -:: Needed variables are PYTORCH_ANACONDA_USERNAME, PYTORCH_ANACONDA_PASSWORD, -:: AWS_ACCESS_KEY_ID, and AWS_SECRET_ACCESS_KEY. Or it can just export the AWS -:: keys and then prepend a logged-in conda installation to the path. -:: TODO: Not supported yet -if "%PYTORCH_CREDENTIALS_FILE%" == "" set PYTORCH_CREDENTIALS_FILE=/c/Users/administrator/nightlies/credentials.sh - -:: Location of the temporary miniconda that is downloaded to install conda-build -:: and aws to upload finished packages TODO this is messy to install this in -:: upload.sh and later use it in upload_logs.sh -if "%CONDA_UPLOADER_INSTALLATION%" == "" set "CONDA_UPLOADER_INSTALLATION=%today%\miniconda" - -:: N.B. BUILDER_REPO and BUILDER_BRANCH are both set in cron_start.sh, as that -:: is the script that actually clones the builder repo that /this/ script is -:: running from. -pushd "%SRC_DIR%\.." -set NIGHTLIES_BUILDER_ROOT=%CD% -popd - -:: The shared pytorch repo to be used by all builds -if "%NIGHTLIES_PYTORCH_ROOT%" == "" set "NIGHTLIES_PYTORCH_ROOT=%today%\csprng" - -:: PYTORCH_REPO -:: The Github org/user whose fork of Pytorch to check out (git clone -:: https://github.com//pytorch.git). This will always be cloned -:: fresh to build with. Default is 'pytorch' -if "%PYTORCH_REPO%" == "" set PYTORCH_REPO=pytorch - -:: PYTORCH_BRANCH -:: The branch of Pytorch to checkout for building (git checkout ). -:: This can either be the name of the branch (e.g. git checkout -:: my_branch_name) or can be a git commit (git checkout 4b2674n...). Default -:: is 'latest', which is a special term that signals to pull the last commit -:: before 0:00 midnight on the NIGHTLIES_DATE -if "%PYTORCH_BRANCH%" == "" set PYTORCH_BRANCH=nightly - -:: Clone the requested pytorch checkout -if exist "%NIGHTLIES_PYTORCH_ROOT%" ( goto clone_end ) else ( goto clone_start ) - -:clone_start - -git clone --recursive "https://github.com/%PYTORCH_REPO%/csprng.git" "%NIGHTLIES_PYTORCH_ROOT%" -pushd "%NIGHTLIES_PYTORCH_ROOT%" - -if "%PYTORCH_BRANCH%" == "latest" ( goto latest_start ) else ( goto latest_end ) - -:latest_start - -:: Switch to the latest commit by 11:59 yesterday -echo PYTORCH_BRANCH is set to latest so I will find the last commit -echo before 0:00 midnight on %NIGHTLIES_DATE% -set git_date=%NIGHTLIES_DATE:_=-% -FOR /F "delims=" %%i IN ('git log --before %git_date% -n 1 "--pretty=%%H"') DO set last_commit=%%i -echo Setting PYTORCH_BRANCH to %last_commit% since that was the last -echo commit before %NIGHTLIES_DATE% -set PYTORCH_BRANCH=%last_commit% - -:latest_end - -git checkout "%PYTORCH_BRANCH%" -git submodule update -popd - -:clone_end - -if "%CUDA_VERSION%" == "cpu" ( - set _DESIRED_CUDA=cpu -) else ( - set _DESIRED_CUDA=cu%CUDA_VERSION% -) - -:: PYTORCH_BUILD_VERSION -:: The actual version string. Used in conda like -:: pytorch-nightly==1.0.0.dev20180908 -:: or in manylinux like -:: torch_nightly-1.0.0.dev20180908-cp27-cp27m-linux_x86_64.whl -if "%TORCHCSPRNG_BUILD_VERSION%" == "" set TORCHCSPRNG_BUILD_VERSION=0.9.0.dev%NIGHTLIES_DATE_COMPACT% - -if "%~1" == "Wheels" ( - if not "%CUDA_VERSION%" == "102" ( - set TORCHCSPRNG_BUILD_VERSION=%TORCHCSPRNG_BUILD_VERSION%+%_DESIRED_CUDA% - ) -) - -:: PYTORCH_BUILD_NUMBER -:: This is usually the number 1. If more than one build is uploaded for the -:: same version/date, then this can be incremented to 2,3 etc in which case -:: '.post2' will be appended to the version string of the package. This can -:: be set to '0' only if OVERRIDE_PACKAGE_VERSION is being used to bypass -:: all the version string logic in downstream scripts. Since we use the -:: override below, exporting this shouldn't actually matter. -if "%TORCHCSPRNG_BUILD_NUMBER%" == "" set /a TORCHCSPRNG_BUILD_NUMBER=1 -if %TORCHCSPRNG_BUILD_NUMBER% GTR 1 set TORCHCSPRNG_BUILD_VERSION=%TORCHCSPRNG_BUILD_VERSION%%TORCHCSPRNG_BUILD_NUMBER% - -:: The nightly builds use their own versioning logic, so we override whatever -:: logic is in setup.py or other scripts -:: TODO: Not supported yet -set OVERRIDE_PACKAGE_VERSION=%TORCHCSPRNG_BUILD_VERSION% -set BUILD_VERSION=%TORCHCSPRNG_BUILD_VERSION% - -:: Build folder for conda builds to use -if "%TORCH_CONDA_BUILD_FOLDER%" == "" set TORCH_CONDA_BUILD_FOLDER=torchcsprng - -:: TORCH_PACKAGE_NAME -:: The name of the package to upload. This should probably be pytorch or -:: pytorch-nightly. N.B. that pip will change all '-' to '_' but conda will -:: not. This is dealt with in downstream scripts. -:: TODO: Not supported yet -if "%TORCH_PACKAGE_NAME%" == "" set TORCH_PACKAGE_NAME=torchcsprng - -:: PIP_UPLOAD_FOLDER should end in a slash. This is to handle it being empty -:: (when uploading to e.g. whl/cpu/) and also to handle nightlies (when -:: uploading to e.g. /whl/nightly/cpu) -:: TODO: Not supported yet -if "%PIP_UPLOAD_FOLDER%" == "" set "PIP_UPLOAD_FOLDER=nightly\" - -:: The location of the binary_sizes dir in s3 is hardcoded into -:: upload_binary_sizes.sh - -:: DAYS_TO_KEEP -:: How many days to keep around for clean.sh. Build folders older than this -:: will be purged at the end of cron jobs. '1' means to keep only the current -:: day. Values less than 1 are not allowed. The default is 5. -:: TODO: Not supported yet -if "%DAYS_TO_KEEP%" == "" set /a DAYS_TO_KEEP=5 -if %DAYS_TO_KEEP% LSS 1 ( - echo DAYS_TO_KEEP cannot be less than 1. - echo A value of 1 means to only keep the build for today - exit /b 1 -) diff --git a/packaging/windows/internal/publish.bat b/packaging/windows/internal/publish.bat deleted file mode 100644 index 7e820d7..0000000 --- a/packaging/windows/internal/publish.bat +++ /dev/null @@ -1,89 +0,0 @@ -@echo off - -set SRC_DIR=%~dp0 -pushd %SRC_DIR% - -if NOT "%CUDA_VERSION%" == "cpu" ( - set PACKAGE_SUFFIX=_cuda%CUDA_VERSION% -) else ( - set PACKAGE_SUFFIX= -) - -if "%PACKAGEFULLNAME%" == "Conda" ( - set PACKAGE=conda -) else ( - set PACKAGE=wheels -) - -if not defined PACKAGE_SUFFIX ( - set PUBLISH_BRANCH=csprng_%PACKAGE%_%DESIRED_PYTHON% -) else ( - set PUBLISH_BRANCH=csprng_%PACKAGE%_%DESIRED_PYTHON%%PACKAGE_SUFFIX% -) - -git clone %ARTIFACT_REPO_URL% -b %PUBLISH_BRANCH% --single-branch >nul 2>&1 - -IF ERRORLEVEL 1 ( - echo Branch %PUBLISH_BRANCH% not exist, falling back to master - set NO_BRANCH=1 - git clone %ARTIFACT_REPO_URL% -b master --single-branch >nul 2>&1 -) - -IF ERRORLEVEL 1 ( - echo Clone failed - goto err -) - -cd pytorch_builder -attrib -s -h -r . /s /d - -:: Empty repo -rd /s /q . || ver >nul - -IF NOT EXIST %PACKAGE% mkdir %PACKAGE% - -xcopy /S /E /Y ..\..\output\*.* %PACKAGE%\ - -git config --global user.name "Azure DevOps" -git config --global user.email peterghost86@gmail.com -git init -git checkout --orphan %PUBLISH_BRANCH% -git remote add origin %ARTIFACT_REPO_URL% -git add . -git commit -m "Update artifacts" - -:push - -if "%RETRY_TIMES%" == "" ( - set /a RETRY_TIMES=10 - set /a SLEEP_TIME=2 -) else ( - set /a RETRY_TIMES=%RETRY_TIMES%-1 - set /a SLEEP_TIME=%SLEEP_TIME%*2 -) - -git push origin %PUBLISH_BRANCH% -f > nul 2>&1 - -IF ERRORLEVEL 1 ( - echo Git push retry times remaining: %RETRY_TIMES% - echo Sleep time: %SLEEP_TIME% seconds - IF %RETRY_TIMES% EQU 0 ( - echo Push failed - goto err - ) - waitfor SomethingThatIsNeverHappening /t %SLEEP_TIME% 2>nul || ver >nul - goto push -) ELSE ( - set RETRY_TIMES= - set SLEEP_TIME= -) - -popd - -exit /b 0 - -:err - -popd - -exit /b 1 diff --git a/packaging/windows/internal/setup.bat b/packaging/windows/internal/setup.bat deleted file mode 100644 index 96cb7fb..0000000 --- a/packaging/windows/internal/setup.bat +++ /dev/null @@ -1,44 +0,0 @@ -@echo off - -echo The flags after configuring: -echo NO_CUDA=%NO_CUDA% -echo CMAKE_GENERATOR=%CMAKE_GENERATOR% -if "%NO_CUDA%"=="" echo CUDA_PATH=%CUDA_PATH% -if NOT "%CC%"=="" echo CC=%CC% -if NOT "%CXX%"=="" echo CXX=%CXX% -if NOT "%DISTUTILS_USE_SDK%"=="" echo DISTUTILS_USE_SDK=%DISTUTILS_USE_SDK% - -set SRC_DIR=%~dp0\.. - -IF "%VSDEVCMD_ARGS%" == "" ( - call "%VS15VCVARSALL%" x64 -) ELSE ( - call "%VS15VCVARSALL%" x64 %VSDEVCMD_ARGS% -) - -pushd %SRC_DIR% - -IF NOT exist "setup.py" ( - cd %MODULE_NAME% -) - -if "%CXX%"=="sccache cl" ( - sccache --stop-server - sccache --start-server - sccache --zero-stats -) - -:pytorch -:: This stores in e.g. D:/_work/1/s/windows/output/cpu -pip wheel -e . --no-deps --wheel-dir ../output - -:build_end -IF ERRORLEVEL 1 exit /b 1 -IF NOT ERRORLEVEL 0 exit /b 1 - -if "%CXX%"=="sccache cl" ( - taskkill /im sccache.exe /f /t || ver > nul - taskkill /im nvcc.exe /f /t || ver > nul -) - -cd .. diff --git a/packaging/windows/internal/test.bat b/packaging/windows/internal/test.bat deleted file mode 100644 index 8e6878b..0000000 --- a/packaging/windows/internal/test.bat +++ /dev/null @@ -1,79 +0,0 @@ -@echo off - -set SRC_DIR=%~dp0\.. -pushd %SRC_DIR% - -set PYTHON_VERSION=%PYTHON_PREFIX:py=cp% - -if "%BUILD_CSPRNG%" == "" ( - pip install future pytest coverage hypothesis protobuf -) ELSE ( - pip install future pytest "pillow>=4.1.1" -) - -for /F "delims=" %%i in ('where /R %SRC_DIR%\output *%MODULE_NAME%*%PYTHON_VERSION%*.whl') do pip install "%%i" - -if ERRORLEVEL 1 exit /b 1 - -if NOT "%BUILD_CSPRNG%" == "" ( - echo Smoke testing imports - python -c "import torchcsprng" - if ERRORLEVEL 1 exit /b 1 - goto smoke_test_end -) - -echo Smoke testing imports -python -c "import torch" -if ERRORLEVEL 1 exit /b 1 - -python -c "from caffe2.python import core" -if ERRORLEVEL 1 exit /b 1 - -echo Checking that MKL is available -python -c "import torch; exit(0 if torch.backends.mkl.is_available() else 1)" -if ERRORLEVEL 1 exit /b 1 - -setlocal EnableDelayedExpansion -set NVIDIA_GPU_EXISTS=0 -for /F "delims=" %%i in ('wmic path win32_VideoController get name') do ( - set GPUS=%%i - if not "x!GPUS:NVIDIA=!" == "x!GPUS!" ( - SET NVIDIA_GPU_EXISTS=1 - goto gpu_check_end - ) -) -:gpu_check_end -endlocal & set NVIDIA_GPU_EXISTS=%NVIDIA_GPU_EXISTS% - -if NOT "%CUDA_PREFIX%" == "cpu" if "%NVIDIA_GPU_EXISTS%" == "1" ( - echo Checking that CUDA archs are setup correctly - python -c "import torch; torch.randn([3,5]).cuda()" - if ERRORLEVEL 1 exit /b 1 - - echo Checking that magma is available - python -c "import torch; torch.rand(1).cuda(); exit(0 if torch.cuda.has_magma else 1)" - if ERRORLEVEL 1 exit /b 1 - - echo Checking that CuDNN is available - python -c "import torch; exit(0 if torch.backends.cudnn.is_available() else 1)" - if ERRORLEVEL 1 exit /b 1 -) -:smoke_test_end - -echo Not running unit tests. Hopefully these problems are caught by CI -goto test_end - -if "%BUILD_CSPRNG%" == "" ( - cd pytorch\test - python run_test.py -v -) else ( - cd csprng - pytest . -) - -if ERRORLEVEL 1 exit /b 1 - -:test_end - -popd -exit /b 0 diff --git a/packaging/windows/internal/upload.bat b/packaging/windows/internal/upload.bat deleted file mode 100644 index f78fe0b..0000000 --- a/packaging/windows/internal/upload.bat +++ /dev/null @@ -1,96 +0,0 @@ -@echo off - -IF "%CONDA_UPLOADER_INSTALLATION%" == "" goto precheck_fail -IF "%PYTORCH_FINAL_PACKAGE_DIR%" == "" goto precheck_fail -IF "%today%" == "" goto precheck_fail -IF "%PYTORCH_ANACONDA_USERNAME%" == "" goto precheck_fail -IF "%PYTORCH_ANACONDA_PASSWORD%" == "" goto precheck_fail - -goto precheck_pass - -:precheck_fail - -echo Please run nightly_defaults.bat first. -echo And remember to set `PYTORCH_FINAL_PACKAGE_DIR` -echo Finally, don't forget to set anaconda tokens -exit /b 1 - -:precheck_pass - -pushd %today% - -:: Install anaconda client -set "CONDA_HOME=%CONDA_UPLOADER_INSTALLATION%" -set "tmp_conda=%CONDA_HOME%" -set "miniconda_exe=%CD%\miniconda.exe" -rmdir /s /q "%CONDA_HOME%" -del miniconda.exe -curl -k https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe -o "%miniconda_exe%" -popd - -IF ERRORLEVEL 1 ( - echo Conda download failed - exit /b 1 -) - -call %~dp0\..\..\conda\install_conda.bat - -IF ERRORLEVEL 1 ( - echo Conda installation failed - exit /b 1 -) - -set "ORIG_PATH=%PATH%" -set "PATH=%CONDA_HOME%;%CONDA_HOME%\scripts;%CONDA_HOME%\Library\bin;%PATH%" - -REM conda install -y anaconda-client -pip install git+https://github.com/peterjc123/anaconda-client.git@log_more_meaningfull_errors -IF ERRORLEVEL 1 ( - echo Anaconda client installation failed - exit /b 1 -) - -set PYTORCH_FINAL_PACKAGE= -:: Upload all the packages under `PYTORCH_FINAL_PACKAGE_DIR` -FOR /F "delims=" %%i IN ('where /R %PYTORCH_FINAL_PACKAGE_DIR% *csprng*.tar.bz2') DO ( - set "PYTORCH_FINAL_PACKAGE=%%i" -) - -IF "%PYTORCH_FINAL_PACKAGE%" == "" ( - echo No package to upload - exit /b 0 -) - -:upload - -if "%RETRY_TIMES%" == "" ( - set /a RETRY_TIMES=10 - set /a SLEEP_TIME=2 -) else ( - set /a RETRY_TIMES=%RETRY_TIMES%-1 - set /a SLEEP_TIME=%SLEEP_TIME%*2 -) - -REM bash -c "yes | anaconda login --username "%PYTORCH_ANACONDA_USERNAME%" --password "%PYTORCH_ANACONDA_PASSWORD%"" -anaconda login --username "%PYTORCH_ANACONDA_USERNAME%" --password "%PYTORCH_ANACONDA_PASSWORD%" -IF ERRORLEVEL 1 ( - echo Anaconda client login failed - exit /b 1 -) - -echo Uploading %PYTORCH_FINAL_PACKAGE% to Anaconda Cloud -anaconda upload "%PYTORCH_FINAL_PACKAGE%" -u pytorch-nightly --label main --force --no-progress - -IF ERRORLEVEL 1 ( - echo Anaconda upload retry times remaining: %RETRY_TIMES% - echo Sleep time: %SLEEP_TIME% seconds - IF %RETRY_TIMES% EQU 0 ( - echo Upload failed - exit /b 1 - ) - waitfor SomethingThatIsNeverHappening /t %SLEEP_TIME% 2>nul || ver >nul - goto upload -) ELSE ( - set RETRY_TIMES= - set SLEEP_TIME= -) diff --git a/packaging/windows/internal/vc_env_helper.bat b/packaging/windows/internal/vc_env_helper.bat deleted file mode 100644 index e85a372..0000000 --- a/packaging/windows/internal/vc_env_helper.bat +++ /dev/null @@ -1,43 +0,0 @@ -@echo on - -set VC_VERSION_LOWER=16 -set VC_VERSION_UPPER=17 -if "%VC_YEAR%" == "2017" ( - set VC_VERSION_LOWER=15 - set VC_VERSION_UPPER=16 -) - -for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [%VC_VERSION_LOWER%^,%VC_VERSION_UPPER%^) -property installationPath`) do ( - if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( - set "VS15INSTALLDIR=%%i" - set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat" - goto vswhere - ) -) - -:vswhere -if "%VSDEVCMD_ARGS%" == "" ( - call "%VS15VCVARSALL%" x64 || exit /b 1 -) else ( - call "%VS15VCVARSALL%" x64 %VSDEVCMD_ARGS% || exit /b 1 -) - -@echo on - -set DISTUTILS_USE_SDK=1 - -set args=%1 -shift -:start -if [%1] == [] goto done -set args=%args% %1 -shift -goto start - -:done -if "%args%" == "" ( - echo Usage: vc_env_helper.bat [command] [args] - echo e.g. vc_env_helper.bat cl /c test.cpp -) - -%args% || exit /b 1 diff --git a/packaging/windows/internal/vc_install_helper.sh b/packaging/windows/internal/vc_install_helper.sh deleted file mode 100644 index cdae180..0000000 --- a/packaging/windows/internal/vc_install_helper.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -set -ex - -if [[ "$CU_VERSION" == "cu92" ]]; then - export VC_YEAR=2017 - export VSDEVCMD_ARGS="-vcvars_ver=14.13" - powershell packaging/windows/internal/vs2017_install.ps1 -elif [[ "$CU_VERSION" == "cu100" ]]; then - export VC_YEAR=2017 - export VSDEVCMD_ARGS="" - powershell packaging/windows/internal/vs2017_install.ps1 -else - export VC_YEAR=2019 - export VSDEVCMD_ARGS="" -fi diff --git a/packaging/windows/internal/vs2017_install.ps1 b/packaging/windows/internal/vs2017_install.ps1 deleted file mode 100644 index 3e953de..0000000 --- a/packaging/windows/internal/vs2017_install.ps1 +++ /dev/null @@ -1,25 +0,0 @@ -$VS_DOWNLOAD_LINK = "https://aka.ms/vs/15/release/vs_buildtools.exe" -$VS_INSTALL_ARGS = @("--nocache","--quiet","--wait", "--add Microsoft.VisualStudio.Workload.VCTools", - "--add Microsoft.VisualStudio.Component.VC.Tools.14.13", - "--add Microsoft.Component.MSBuild", - "--add Microsoft.VisualStudio.Component.Roslyn.Compiler", - "--add Microsoft.VisualStudio.Component.TextTemplating", - "--add Microsoft.VisualStudio.Component.VC.CoreIde", - "--add Microsoft.VisualStudio.Component.VC.Redist.14.Latest", - "--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Core", - "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64", - "--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Win81") - -curl.exe --retry 3 -kL $VS_DOWNLOAD_LINK --output vs_installer.exe -if ($LASTEXITCODE -ne 0) { - echo "Download of the VS 2017 installer failed" - exit 1 -} - -$process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_INSTALL_ARGS -NoNewWindow -Wait -PassThru -Remove-Item -Path vs_installer.exe -Force -$exitCode = $process.ExitCode -if (($exitCode -ne 0) -and ($exitCode -ne 3010)) { - echo "VS 2017 installer exited with code $exitCode, which should be one of [0, 3010]." - exit 1 -} diff --git a/packaging/windows/internal/vs2019_install.ps1 b/packaging/windows/internal/vs2019_install.ps1 deleted file mode 100644 index e436051..0000000 --- a/packaging/windows/internal/vs2019_install.ps1 +++ /dev/null @@ -1,21 +0,0 @@ -$VS_DOWNLOAD_LINK = "https://aka.ms/vs/16/release/vs_buildtools.exe" -$VS_INSTALL_ARGS = @("--nocache","--quiet","--wait", "--add Microsoft.VisualStudio.Workload.VCTools", - "--add Microsoft.Component.MSBuild", - "--add Microsoft.VisualStudio.Component.Roslyn.Compiler", - "--add Microsoft.VisualStudio.Component.VC.CoreBuildTools", - "--add Microsoft.VisualStudio.Component.VC.Redist.14.Latest", - "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64") - -curl.exe --retry 3 -kL $VS_DOWNLOAD_LINK --output vs_installer.exe -if ($LASTEXITCODE -ne 0) { - echo "Download of the VS 2019 installer failed" - exit 1 -} - -$process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_INSTALL_ARGS -NoNewWindow -Wait -PassThru -Remove-Item -Path vs_installer.exe -Force -$exitCode = $process.ExitCode -if (($exitCode -ne 0) -and ($exitCode -ne 3010)) { - echo "VS 2019 installer exited with code $exitCode, which should be one of [0, 3010]." - exit 1 -} diff --git a/packaging/windows/internal/vs_install.bat b/packaging/windows/internal/vs_install.bat deleted file mode 100644 index 348a5e3..0000000 --- a/packaging/windows/internal/vs_install.bat +++ /dev/null @@ -1,14 +0,0 @@ -@echo off - -set VS_DOWNLOAD_LINK=https://aka.ms/vs/15/release/vs_enterprise.exe -set VS_INSTALL_PATH=C:\Program Files (x86)\Microsoft Visual Studio\2017\Enterprise -set VS_INSTALL_ARGS=--nocache --quiet --wait --add Microsoft.VisualStudio.Component.VC.Tools.14.11 -set VSDEVCMD_ARGS=-vcvars_ver=14.11 - -curl -k -L %VS_DOWNLOAD_LINK% --output vs_installer.exe -if errorlevel 1 exit /b 1 - -start /wait vs_installer.exe modify --installPath "%VS_INSTALL_PATH%" %VS_INSTALL_ARGS% -if not errorlevel 0 exit /b 1 -if errorlevel 1 if not errorlevel 3010 exit /b 1 -if errorlevel 3011 exit /b 1 diff --git a/packaging/windows/old/cuda100.bat b/packaging/windows/old/cuda100.bat deleted file mode 100644 index f088bca..0000000 --- a/packaging/windows/old/cuda100.bat +++ /dev/null @@ -1,59 +0,0 @@ -@echo off - -IF NOT "%BUILD_CSPRNG%" == "" ( - set MODULE_NAME=csprng -) ELSE ( - set MODULE_NAME=pytorch -) - -IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" ( - call internal\clone.bat - cd .. - IF ERRORLEVEL 1 goto eof -) ELSE ( - call internal\clean.bat -) - -call internal\check_deps.bat -IF ERRORLEVEL 1 goto eof - -REM Check for optional components - -set NO_CUDA= -set CMAKE_GENERATOR=Visual Studio 15 2017 Win64 - -IF "%NVTOOLSEXT_PATH%"=="" ( - echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing - exit /b 1 - goto optcheck -) - -IF "%CUDA_PATH_V10_0%"=="" ( - echo CUDA 10.0 not found, failing - exit /b 1 -) ELSE ( - IF "%BUILD_CSPRNG%" == "" ( - set TORCH_CUDA_ARCH_LIST=3.5;5.0+PTX;6.0;6.1;7.0;7.5 - set TORCH_NVCC_FLAGS=-Xfatbin -compress-all - ) ELSE ( - set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50 - ) - - set "CUDA_PATH=%CUDA_PATH_V10_0%" - set "PATH=%CUDA_PATH_V10_0%\bin;%PATH%" -) - -:optcheck - -IF "%BUILD_CSPRNG%" == "" ( - call internal\check_opts.bat - IF ERRORLEVEL 1 goto eof - - call internal\copy.bat - IF ERRORLEVEL 1 goto eof -) - -call internal\setup.bat -IF ERRORLEVEL 1 goto eof - -:eof diff --git a/packaging/windows/old/cuda90.bat b/packaging/windows/old/cuda90.bat deleted file mode 100644 index 520b794..0000000 --- a/packaging/windows/old/cuda90.bat +++ /dev/null @@ -1,59 +0,0 @@ -@echo off - -IF NOT "%BUILD_CSPRNG%" == "" ( - set MODULE_NAME=csprng -) ELSE ( - set MODULE_NAME=pytorch -) - -IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" ( - call internal\clone.bat - cd .. - IF ERRORLEVEL 1 goto eof -) ELSE ( - call internal\clean.bat -) - -call internal\check_deps.bat -IF ERRORLEVEL 1 goto eof - -REM Check for optional components - -set NO_CUDA= -set CMAKE_GENERATOR=Visual Studio 15 2017 Win64 - -IF "%NVTOOLSEXT_PATH%"=="" ( - echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing - exit /b 1 - goto optcheck -) - -IF "%CUDA_PATH_V9_0%"=="" ( - echo CUDA 9 not found, failing - exit /b 1 -) ELSE ( - IF "%BUILD_CSPRNG%" == "" ( - set TORCH_CUDA_ARCH_LIST=3.5;5.0+PTX;6.0;7.0 - set TORCH_NVCC_FLAGS=-Xfatbin -compress-all - ) ELSE ( - set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50 - ) - - set "CUDA_PATH=%CUDA_PATH_V9_0%" - set "PATH=%CUDA_PATH_V9_0%\bin;%PATH%" -) - -:optcheck - -IF "%BUILD_CSPRNG%" == "" ( - call internal\check_opts.bat - IF ERRORLEVEL 1 goto eof - - call internal\copy.bat - IF ERRORLEVEL 1 goto eof -) - -call internal\setup.bat -IF ERRORLEVEL 1 goto eof - -:eof diff --git a/packaging/windows/templates/auth_task.yml b/packaging/windows/templates/auth_task.yml deleted file mode 100644 index 7554ffa..0000000 --- a/packaging/windows/templates/auth_task.yml +++ /dev/null @@ -1,17 +0,0 @@ -jobs: -- job: 'VSTS_Auth_Task' - timeoutInMinutes: 5 - cancelTimeoutInMinutes: 5 - variables: - - group: 'peterjc-vsts-token' - - pool: - vmImage: 'vs2017-win2016' - - steps: - - checkout: self - clean: true - - - template: vsts_auth.yml - parameters: - auth: $(vsts_auth) diff --git a/packaging/windows/templates/build_conda.yml b/packaging/windows/templates/build_conda.yml deleted file mode 100644 index ce29c06..0000000 --- a/packaging/windows/templates/build_conda.yml +++ /dev/null @@ -1,15 +0,0 @@ -parameters: - msagent: false - -steps: -- bash: 'find . -name "*.sh" -exec dos2unix {} +' - displayName: Replace file endings - -- script: 'if not exist %PYTORCH_FINAL_PACKAGE_DIR% mkdir %PYTORCH_FINAL_PACKAGE_DIR%' - displayName: 'Create final package directory' - -- bash: './packaging/conda/build_csprng.sh $CUDA_VERSION $TORCHCSPRNG_BUILD_VERSION $TORCHCSPRNG_BUILD_NUMBER' - displayName: Build - env: - ${{ if eq(parameters.msagent, 'true') }}: - MAX_JOBS: 2 diff --git a/packaging/windows/templates/build_task.yml b/packaging/windows/templates/build_task.yml deleted file mode 100644 index 18d4f8e..0000000 --- a/packaging/windows/templates/build_task.yml +++ /dev/null @@ -1,173 +0,0 @@ -parameters: - package: '' - spec: '' - jobDesc: '' - packageDesc: '' - msagent: true - cpuEnabled: true - cudaEnabled: true - condaEnabled: true - wheelsEnabled: true - override: false - -jobs: -- job: 'Windows_${{ parameters.spec }}_${{ parameters.package }}_Build' - timeoutInMinutes: 60 - cancelTimeoutInMinutes: 5 - condition: > - or(and(eq('${{ parameters.package }}', 'Conda'), eq('${{ parameters.spec }}', 'CPU'), - eq('${{ parameters.condaEnabled }}', 'true'), eq('${{ parameters.cpuEnabled }}', 'true')), - and(eq('${{ parameters.package }}', 'Wheels'), eq('${{ parameters.spec }}', 'CPU'), - eq('${{ parameters.wheelsEnabled }}', 'true'), eq('${{ parameters.cpuEnabled }}', 'true')), - and(eq('${{ parameters.package }}', 'Conda'), eq('${{ parameters.spec }}', 'CUDA'), - eq('${{ parameters.condaEnabled }}', 'true'), eq('${{ parameters.cudaEnabled }}', 'true')), - and(eq('${{ parameters.package }}', 'Wheels'), eq('${{ parameters.spec }}', 'CUDA'), - eq('${{ parameters.wheelsEnabled }}', 'true'), eq('${{ parameters.cudaEnabled }}', 'true'))) - variables: - - ${{ if eq(parameters.override, 'true') }}: - - name: TORCHCSPRNG_BUILD_NUMBER - value: 1 - - name: PYTORCH_REPO - value: 'pytorch' - - name: PYTORCH_BRANCH - value: 'v0.4.0' - - ${{ if eq(parameters.msagent, 'true') }}: - - name: USE_SCCACHE - value: 0 - - ${{ if eq(parameters.msagent, 'false') }}: - - name: USE_SCCACHE - value: 1 - - ${{ if eq(parameters.package, 'Conda') }}: - - group: peterjc_anaconda_token - - name: PYTORCH_FINAL_PACKAGE_DIR - value: '$(Build.Repository.LocalPath)\packaging\windows\output' - - strategy: - maxParallel: 10 - matrix: - ${{ if eq(parameters.spec, 'CPU') }}: - PY3.5: - DESIRED_PYTHON: 3.5 - CUDA_VERSION: cpu - PY3.6: - DESIRED_PYTHON: 3.6 - CUDA_VERSION: cpu - PY3.7: - DESIRED_PYTHON: 3.7 - CUDA_VERSION: cpu - PY3.8: - DESIRED_PYTHON: 3.8 - CUDA_VERSION: cpu - PY3.9: - DESIRED_PYTHON: 3.9 - CUDA_VERSION: cpu - ${{ if ne(parameters.spec, 'CPU') }}: - PY3.5_92: - DESIRED_PYTHON: 3.5 - CUDA_VERSION: 92 - PY3.6_92: - DESIRED_PYTHON: 3.6 - CUDA_VERSION: 92 - PY3.7_92: - DESIRED_PYTHON: 3.7 - CUDA_VERSION: 92 - PY3.8_92: - DESIRED_PYTHON: 3.8 - CUDA_VERSION: 92 - PY3.9_92: - DESIRED_PYTHON: 3.9 - CUDA_VERSION: 92 - PY3.5_101: - DESIRED_PYTHON: 3.5 - CUDA_VERSION: 101 - PY3.6_101: - DESIRED_PYTHON: 3.6 - CUDA_VERSION: 101 - PY3.7_101: - DESIRED_PYTHON: 3.7 - CUDA_VERSION: 101 - PY3.8_101: - DESIRED_PYTHON: 3.8 - CUDA_VERSION: 101 - PY3.9_101: - DESIRED_PYTHON: 3.9 - CUDA_VERSION: 101 - PY3.5_102: - DESIRED_PYTHON: 3.5 - CUDA_VERSION: 102 - PY3.6_102: - DESIRED_PYTHON: 3.6 - CUDA_VERSION: 102 - PY3.7_102: - DESIRED_PYTHON: 3.7 - CUDA_VERSION: 102 - PY3.8_102: - DESIRED_PYTHON: 3.8 - CUDA_VERSION: 102 - PY3.9_102: - DESIRED_PYTHON: 3.9 - CUDA_VERSION: 102 - - pool: - ${{ if eq(parameters.msagent, 'true') }}: - vmImage: 'vs2017-win2016' - ${{ if eq(parameters.msagent, 'false') }}: - name: 'release' - - steps: - - checkout: self - clean: true - - - template: setup_env_for_msagent.yml - parameters: - msagent: ${{ parameters.msagent }} - - # - ${{ if and(eq(parameters.override, 'true'), eq(parameters.package, 'Wheels')) }}: - # - template: override_pytorch_version.yml - - - template: setup_nightly_variables.yml - parameters: - package: ${{ parameters.package }} - - - ${{ if eq(parameters.package, 'Wheels') }}: - - template: build_wheels.yml - parameters: - msagent: ${{ parameters.msagent }} - - - ${{ if eq(parameters.package, 'Conda') }}: - - template: build_conda.yml - parameters: - msagent: ${{ parameters.msagent }} - - - ${{ if or(eq(parameters.package, 'Wheels'), eq(parameters.package, 'Conda')) }}: - - template: publish_test_results.yml - parameters: - msagent: ${{ parameters.msagent }} - - # If you want to upload binaries to S3 & Anaconda Cloud, please uncomment this section. - - ${{ if and(eq(parameters.package, 'Wheels'), eq(parameters.spec, 'CPU')) }}: - - template: upload_to_s3.yml - parameters: - cuVer: '$(CUDA_VERSION)' - cudaVer: '$(CUDA_VERSION)' - - - ${{ if and(eq(parameters.package, 'Wheels'), ne(parameters.spec, 'CPU')) }}: - - template: upload_to_s3.yml - parameters: - cuVer: 'cu$(CUDA_VERSION)' - cudaVer: 'cuda$(CUDA_VERSION)' - - - ${{ if eq(parameters.package, 'Conda') }}: - - template: upload_to_conda.yml - parameters: - user: $(peterjc_conda_username) - pass: $(peterjc_conda_password) - - # If you want to upload binaries to Azure Git, please uncomment this section. - # - ${{ if or(eq(parameters.package, 'Wheels'), eq(parameters.package, 'Conda')) }}: - # - template: publish_test_results.yml - # parameters: - # msagent: ${{ parameters.msagent }} - # - template: publish_packages.yml - # parameters: - # package: ${{ parameters.package }} diff --git a/packaging/windows/templates/build_wheels.yml b/packaging/windows/templates/build_wheels.yml deleted file mode 100644 index 8393fdb..0000000 --- a/packaging/windows/templates/build_wheels.yml +++ /dev/null @@ -1,9 +0,0 @@ -parameters: - msagent: false - -steps: -- script: 'call packaging/windows/build_csprng.bat %CUDA_VERSION% %TORCHCSPRNG_BUILD_VERSION% %TORCHCSPRNG_BUILD_NUMBER%' - displayName: Build - env: - ${{ if eq(parameters.msagent, 'true') }}: - MAX_JOBS: 2 diff --git a/packaging/windows/templates/linux_build_task.yml b/packaging/windows/templates/linux_build_task.yml deleted file mode 100644 index 0b32892..0000000 --- a/packaging/windows/templates/linux_build_task.yml +++ /dev/null @@ -1,38 +0,0 @@ -parameters: - msagent: true - enabled: false - -jobs: -- job: 'Linux_CPU_Conda_Build' - timeoutInMinutes: 0 - cancelTimeoutInMinutes: 5 - condition: ${{ eq(parameters.enabled, 'true') }} - variables: - CUDA_VERSION: cpu - TORCH_CONDA_BUILD_FOLDER: pytorch-nightly - PYTORCH_FINAL_PACKAGE_DIR: '$(Build.Repository.LocalPath)/output' - - strategy: - maxParallel: 10 - matrix: - PY3.5: - DESIRED_PYTHON: 3.5 - - pool: - vmImage: 'ubuntu-16.04' - - steps: - - checkout: self - clean: true - - - script: 'sudo apt-get install p7zip-full' - displayName: 'Install 7Zip' - - - task: CondaEnvironment@1 - displayName: 'Install conda-build' - inputs: - packageSpecs: 'conda-build' - - - template: build_conda.yml - parameters: - msagent: ${{ parameters.msagent }} diff --git a/packaging/windows/templates/override_pytorch_version.yml b/packaging/windows/templates/override_pytorch_version.yml deleted file mode 100644 index 8af93ae..0000000 --- a/packaging/windows/templates/override_pytorch_version.yml +++ /dev/null @@ -1,6 +0,0 @@ -steps: -- script: 'windows/internal/override_pytorch_version.bat' - displayName: 'Override PyTorch Build Version for Wheels' - -- script: 'echo $(PYTORCH_BUILD_VERSION)' - displayName: 'Show PyTorch Build Version' diff --git a/packaging/windows/templates/publish_packages.yml b/packaging/windows/templates/publish_packages.yml deleted file mode 100644 index 51ce824..0000000 --- a/packaging/windows/templates/publish_packages.yml +++ /dev/null @@ -1,8 +0,0 @@ -parameters: - package: '' - -steps: -- script: 'packaging/windows/internal/publish.bat' - displayName: 'Upload packages to Azure DevOps Repo' - env: - PACKAGEFULLNAME: ${{ parameters.package }} diff --git a/packaging/windows/templates/publish_test_results.yml b/packaging/windows/templates/publish_test_results.yml deleted file mode 100644 index 1e0dc02..0000000 --- a/packaging/windows/templates/publish_test_results.yml +++ /dev/null @@ -1,6 +0,0 @@ -steps: -- task: PublishTestResults@2 # No test results to publish - inputs: - testResultsFiles: 'windows/pytorch/test/**/*.xml' - testRunTitle: 'Publish test results' - enabled: false diff --git a/packaging/windows/templates/setup_env_for_msagent.yml b/packaging/windows/templates/setup_env_for_msagent.yml deleted file mode 100644 index 377734f..0000000 --- a/packaging/windows/templates/setup_env_for_msagent.yml +++ /dev/null @@ -1,25 +0,0 @@ -parameters: - msagent: false - -steps: -- ${{ if eq(parameters.msagent, 'true') }}: - - task: BatchScript@1 - displayName: 'Install 7Zip & cURL' - inputs: - filename: 'packaging/windows/internal/dep_install.bat' - - modifyEnvironment: true - - - task: BatchScript@1 - displayName: 'Install Visual Studio 2017' - inputs: - filename: 'packaging/windows/internal/vs_install.bat' - - modifyEnvironment: true - - - task: BatchScript@1 - displayName: 'Install CUDA' - inputs: - filename: 'packaging/windows/internal/cuda_install.bat' - - modifyEnvironment: true diff --git a/packaging/windows/templates/setup_nightly_variables.yml b/packaging/windows/templates/setup_nightly_variables.yml deleted file mode 100644 index 94b2fe9..0000000 --- a/packaging/windows/templates/setup_nightly_variables.yml +++ /dev/null @@ -1,11 +0,0 @@ -parameters: - package: '' - -steps: -- task: BatchScript@1 - displayName: 'Setup nightly variables' - inputs: - filename: 'packaging/windows/internal/nightly_defaults.bat' - arguments: ${{ parameters.package }} - - modifyEnvironment: true diff --git a/packaging/windows/templates/upload_to_conda.yml b/packaging/windows/templates/upload_to_conda.yml deleted file mode 100644 index dc172bc..0000000 --- a/packaging/windows/templates/upload_to_conda.yml +++ /dev/null @@ -1,10 +0,0 @@ -parameters: - user: '' - pass: '' - -steps: -- script: 'call packaging/windows/internal/upload.bat' - displayName: 'Upload packages to Anaconda Cloud' - env: - PYTORCH_ANACONDA_USERNAME: ${{ parameters.user }} - PYTORCH_ANACONDA_PASSWORD: ${{ parameters.pass }} diff --git a/packaging/windows/templates/upload_to_s3.yml b/packaging/windows/templates/upload_to_s3.yml deleted file mode 100644 index 1de91b5..0000000 --- a/packaging/windows/templates/upload_to_s3.yml +++ /dev/null @@ -1,15 +0,0 @@ -parameters: - cuVer: '' - cudaVer: '' - -steps: -- task: AmazonWebServices.aws-vsts-tools.S3Upload.S3Upload@1 - displayName: 'Upload ${{ parameters.cuVer }} wheel to S3' - inputs: - awsCredentials: 'Pytorch S3 bucket' - bucketName: 'pytorch' - sourceFolder: 'packaging/windows/output' - globExpressions: '*.whl' - targetFolder: 'whl/nightly/${{ parameters.cuVer }}/' - filesAcl: 'public-read' - flattenFolders: 'true' diff --git a/packaging/windows/templates/vsts_auth.yml b/packaging/windows/templates/vsts_auth.yml deleted file mode 100644 index fde767d..0000000 --- a/packaging/windows/templates/vsts_auth.yml +++ /dev/null @@ -1,8 +0,0 @@ -parameters: - auth: '' - -steps: -- script: 'call packaging/windows/internal/auth.bat' - displayName: 'Sign in to Azure Pipelines' - env: - VSTS_AUTH: ${{ parameters.auth }} diff --git a/test/__init__.py b/test/__init__.py deleted file mode 100644 index b8c6945..0000000 --- a/test/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. diff --git a/test/test_csprng.py b/test/test_csprng.py deleted file mode 100644 index a85b7b8..0000000 --- a/test/test_csprng.py +++ /dev/null @@ -1,654 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import math -import os -import random -import time -import unittest - -import numpy as np -import torch -from Crypto.Cipher import AES -from Crypto.Util import Counter -from scipy import stats - -try: - import torchcsprng as csprng -except ImportError: - raise RuntimeError("CSPRNG not available") - -IS_SANDCASTLE = ( - os.getenv("SANDCASTLE") == "1" or os.getenv("TW_JOB_USER") == "sandcastle" -) -IS_FBCODE = os.getenv("PYTORCH_TEST_FBCODE") == "1" - - -def to_numpy(t, dtype=torch.float): - if t.dtype == torch.bfloat16: - t = t.to(dtype) - return t.numpy() - - -def to_bytes(t): - if t.dtype == torch.bfloat16: - t = t.view(torch.int16) - return t.cpu().numpy().view(np.int8) - - -class TestCSPRNG(unittest.TestCase): - - all_generators = [ - csprng.create_random_device_generator(), - csprng.create_random_device_generator("/dev/urandom"), - csprng.create_mt19937_generator(), - csprng.create_mt19937_generator(42), - ] - - int_dtypes = [torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64] - - standard_fp_dtypes = [torch.float, torch.double] - - non_standard_fp_dtypes = [torch.half, torch.bfloat16] - - fp_dtypes = standard_fp_dtypes + non_standard_fp_dtypes - - num_dtypes = int_dtypes + fp_dtypes - - all_dtypes = num_dtypes + [torch.bool] - - size = 1000 - - all_devices = ( - ["cpu", "cuda"] - if (torch.cuda.is_available() and csprng.supports_cuda()) - else ["cpu"] - ) - - def test_random_kstest(self): - for device in self.all_devices: - for gen in self.all_generators: - for dtype in self.num_dtypes: - if dtype == torch.float: - to_inc = 2**24 - elif dtype == torch.double: - to_inc = 2**53 - elif dtype == torch.half: - to_inc = 2**11 - elif dtype == torch.bfloat16: - to_inc = 2**8 - else: - to_inc = torch.iinfo(dtype).max - - t = torch.empty(self.size, dtype=dtype, device=device).random_( - generator=gen - ) - res = stats.kstest( - to_numpy(t.cpu()), stats.randint.cdf, args=(0, to_inc) - ) - self.assertTrue(res.statistic < 0.1) - - no_cuda = not torch.cuda.is_available() or not csprng.supports_cuda() - - no_cuda_message = ( - "CUDA is not available or csprng was not compiled with CUDA support" - ) - - @unittest.skipIf(no_cuda, no_cuda_message) - def test_random_cpu_vs_cuda(self): - for dtype in self.num_dtypes: - gen = csprng.create_mt19937_generator(42) - cpu_t = torch.empty(self.size, dtype=dtype, device="cpu").random_( - generator=gen - ) - gen = csprng.create_mt19937_generator(42) - cuda_t = torch.empty(self.size, dtype=dtype, device="cuda").random_( - generator=gen - ) - self.assertTrue((cpu_t == cuda_t.cpu()).all()) - - def test_random_to_kstest(self): - to_ = 42 - for device in self.all_devices: - for gen in self.all_generators: - for dtype in self.num_dtypes: - t = torch.zeros(self.size, dtype=dtype, device=device).random_( - to_, generator=gen - ) - res = stats.kstest( - to_numpy(t.cpu()), stats.randint.cdf, args=(0, to_) - ) - self.assertTrue(res.statistic < 0.1) - - @unittest.skipIf(no_cuda, no_cuda_message) - def test_random_to_cpu_vs_cuda(self): - to_ = 42 - for dtype in self.num_dtypes: - gen = csprng.create_mt19937_generator(42) - cpu_t = torch.zeros(self.size, dtype=dtype, device="cpu").random_( - to_, generator=gen - ) - gen = csprng.create_mt19937_generator(42) - cuda_t = torch.zeros(self.size, dtype=dtype, device="cuda").random_( - to_, generator=gen - ) - self.assertTrue((cpu_t == cuda_t.cpu()).all()) - - def test_random_from_to_kstest(self): - for device in self.all_devices: - for gen in self.all_generators: - for dtype in self.num_dtypes: - for from_ in [0, 24, 42]: - for to_ in [42, 99, 123]: - if from_ < to_: - t = torch.zeros( - self.size, dtype=dtype, device=device - ).random_(from_, to_, generator=gen) - res = stats.kstest( - to_numpy(t.cpu()), - stats.randint.cdf, - args=(from_, to_), - ) - self.assertTrue(res.statistic < 0.2) - - @unittest.skipIf(no_cuda, no_cuda_message) - def test_random_from_to_cpu_vs_cuda(self): - for dtype in self.num_dtypes: - for from_ in [0, 24, 42]: - for to_ in [42, 99, 123]: - if from_ < to_: - gen = csprng.create_mt19937_generator(42) - cpu_t = torch.zeros( - self.size, dtype=dtype, device="cpu" - ).random_(from_, to_, generator=gen) - gen = csprng.create_mt19937_generator(42) - cuda_t = torch.zeros( - self.size, dtype=dtype, device="cuda" - ).random_(from_, to_, generator=gen) - self.assertTrue((cpu_t == cuda_t.cpu()).all()) - - def test_random_bool(self): - for device in self.all_devices: - for gen in self.all_generators: - t = torch.empty(self.size, dtype=torch.bool, device=device) - - t.fill_(False) - t.random_(generator=gen) - self.assertEqual(t.min(), False) - self.assertEqual(t.max(), True) - self.assertTrue( - 0.4 < (t.eq(True)).to(torch.int).sum().item() / self.size < 0.6 - ) - - t.fill_(True) - t.random_(generator=gen) - self.assertEqual(t.min(), False) - self.assertEqual(t.max(), True) - self.assertTrue( - 0.4 < (t.eq(True)).to(torch.int).sum().item() / self.size < 0.6 - ) - - @unittest.skipIf(no_cuda, no_cuda_message) - def test_random_bool_cpu_vs_cuda(self): - gen = csprng.create_mt19937_generator(42) - cpu_t = torch.empty(self.size, dtype=torch.bool, device="cpu").random_( - generator=gen - ) - gen = csprng.create_mt19937_generator(42) - cuda_t = torch.empty(self.size, dtype=torch.bool, device="cuda").random_( - generator=gen - ) - self.assertTrue((cpu_t == cuda_t.cpu()).all()) - - def test_uniform_kstest(self): - for device in self.all_devices: - for gen in self.all_generators: - for dtype in self.fp_dtypes: - for from_ in [-42, 0, 4.2]: - for to_ in [-4.2, 0, 42]: - if to_ > from_: - t = torch.empty( - self.size, dtype=dtype, device=device - ).uniform_(from_, to_, generator=gen) - res = stats.kstest( - to_numpy(t.cpu(), torch.double), - "uniform", - args=(from_, (to_ - from_)), - ) - self.assertTrue(res.statistic < 0.1) - - @unittest.skipIf(no_cuda, no_cuda_message) - def test_uniform_cpu_vs_cuda(self): - for dtype in self.fp_dtypes: - for from_ in [-42, 0, 4.2]: - for to_ in [-4.2, 0, 42]: - if to_ > from_: - gen = csprng.create_mt19937_generator(42) - cpu_t = torch.empty( - self.size, dtype=dtype, device="cpu" - ).uniform_(from_, to_, generator=gen) - gen = csprng.create_mt19937_generator(42) - cuda_t = torch.empty( - self.size, dtype=dtype, device="cuda" - ).uniform_(from_, to_, generator=gen) - self.assertTrue(torch.allclose(cpu_t, cuda_t.cpu(), 1e-9)) - - def test_normal_kstest(self): - for device in self.all_devices: - for gen in self.all_generators: - for dtype in self.fp_dtypes: - for mean in [-3, 0, 7]: - for std in [1, 5, 7]: - t = torch.empty( - self.size, dtype=dtype, device=device - ).normal_(mean=mean, std=std, generator=gen) - res = stats.kstest( - to_numpy(t.cpu(), torch.double), - "norm", - args=(mean, std), - ) - self.assertTrue(res.statistic < 0.1) - - @unittest.skipIf(no_cuda, no_cuda_message) - def test_normal_cpu_vs_cuda(self): - for dtype in self.fp_dtypes: - for mean in [-3, 0, 7]: - for std in [1, 5, 7]: - gen = csprng.create_mt19937_generator(42) - cpu_t = torch.empty(self.size, dtype=dtype, device="cpu").normal_( - mean=mean, std=std, generator=gen - ) - gen = csprng.create_mt19937_generator(42) - cuda_t = torch.empty(self.size, dtype=dtype, device="cuda").normal_( - mean=mean, std=std, generator=gen - ) - self.assertTrue(torch.allclose(cpu_t, cuda_t.cpu(), 1e-9)) - - def test_log_normal_kstest(self): - for device in self.all_devices: - for gen in self.all_generators: - for dtype in self.fp_dtypes: - for mean in [-3, 0, 7]: - for std in [1, 5, 7]: - t = torch.empty( - self.size, dtype=dtype, device=device - ).log_normal_(mean=mean, std=std, generator=gen) - res = stats.kstest( - to_numpy(t.cpu(), torch.double), - "lognorm", - args=(std, 0, math.exp(mean)), - ) - if dtype in [torch.half, torch.bfloat16]: - self.assertTrue(res.statistic < 0.4) - else: - self.assertTrue(res.statistic < 0.1) - - @unittest.skipIf(no_cuda, no_cuda_message) - def test_log_normal_cpu_vs_cuda(self): - for dtype in self.fp_dtypes: - for mean in [-3, 0, 7]: - for std in [1, 5, 7]: - gen = csprng.create_mt19937_generator(42) - cpu_t = torch.empty( - self.size, dtype=dtype, device="cpu" - ).log_normal_(mean=mean, std=std, generator=gen) - gen = csprng.create_mt19937_generator(42) - cuda_t = torch.empty( - self.size, dtype=dtype, device="cuda" - ).log_normal_(mean=mean, std=std, generator=gen) - self.assertTrue( - torch.allclose(cpu_t, cuda_t.cpu(), 1e-4, equal_nan=True) - ) - - def test_exponential_kstest(self): - for device in self.all_devices: - for gen in self.all_generators: - for dtype in self.fp_dtypes: - for lambd in [0.5, 1.0, 5.0]: - t = torch.empty( - self.size, dtype=dtype, device=device - ).exponential_(lambd=lambd, generator=gen) - res = stats.kstest( - to_numpy(t.cpu(), torch.double), - "expon", - args=( - 0, - 1 / lambd, - ), - ) - self.assertTrue(res.statistic < 0.1) - - @unittest.skipIf(no_cuda, no_cuda_message) - @unittest.skip("https://github.com/pytorch/pytorch/issues/38662") - def test_exponential_cpu_vs_cuda(self): - for dtype in self.fp_dtypes: - for lambd in [0.5, 1.0, 5.0]: - gen = csprng.create_mt19937_generator(42) - cpu_t = torch.empty(self.size, dtype=dtype, device="cpu").exponential_( - lambd=lambd, generator=gen - ) - gen = csprng.create_mt19937_generator(42) - cuda_t = torch.empty( - self.size, dtype=dtype, device="cuda" - ).exponential_(lambd=lambd, generator=gen) - self.assertTrue(torch.allclose(cpu_t, cuda_t.cpu(), 1e-9)) - - def test_cauchy_kstest(self): - for device in self.all_devices: - for gen in self.all_generators: - for dtype in self.fp_dtypes: - for median in [-10, 0, 50]: - for sigma in [0.5, 1.0, 10.0]: - t = torch.empty( - self.size, dtype=dtype, device=device - ).cauchy_(median=median, sigma=sigma, generator=gen) - res = stats.kstest( - to_numpy(t.cpu(), torch.double), - "cauchy", - args=(median, sigma), - ) - if dtype in [torch.half, torch.bfloat16]: - self.assertTrue(res.statistic < 0.4) - else: - self.assertTrue(res.statistic < 0.1) - - @unittest.skipIf(no_cuda, no_cuda_message) - def test_cauchy_cpu_vs_cuda(self): - for dtype in self.fp_dtypes: - for median in [-10, 0, 50]: - for sigma in [0.5, 1.0, 10.0]: - gen = csprng.create_mt19937_generator(42) - cpu_t = torch.empty(self.size, dtype=dtype, device="cpu").cauchy_( - median=median, sigma=sigma, generator=gen - ) - gen = csprng.create_mt19937_generator(42) - cuda_t = torch.empty(self.size, dtype=dtype, device="cuda").cauchy_( - median=median, sigma=sigma, generator=gen - ) - self.assertTrue(torch.allclose(cpu_t, cuda_t.cpu(), 1e-9)) - - def test_geometric(self): - for device in self.all_devices: - for gen in self.all_generators: - for dtype in self.fp_dtypes: - for p in [0.2, 0.5, 0.8]: - t = torch.empty( - self.size, dtype=dtype, device=device - ).geometric_(p=p, generator=gen) - # actual = np.histogram(t.cpu().to(torch.double), np.arange(1, 100))[0] - # expected = stats.geom(p).pmf(np.arange(1, 99)) * self.size - # res = stats.chisquare(actual, expected) - # self.assertAlmostEqual(res.pvalue, 1.0, delta=0.5) TODO https://github.com/pytorch/csprng/issues/7 - - @unittest.skipIf(no_cuda, no_cuda_message) - def test_geometric_cpu_vs_cuda(self): - for dtype in self.fp_dtypes: - for p in [0.2, 0.5, 0.8]: - gen = csprng.create_mt19937_generator(42) - cpu_t = torch.empty(self.size, dtype=dtype, device="cpu").geometric_( - p=p, generator=gen - ) - gen = csprng.create_mt19937_generator(42) - cuda_t = torch.empty(self.size, dtype=dtype, device="cuda").geometric_( - p=p, generator=gen - ) - self.assertTrue( - torch.allclose(cpu_t, cuda_t.cpu(), 1e-9, equal_nan=True) - ) - - def test_non_contiguous_vs_contiguous(self): - size = 10 - for device in self.all_devices: - for dtype in self.all_dtypes: - for i in range(10): - t = torch.zeros([size, size, size], dtype=dtype, device=device) - x1 = random.randrange(0, size) - y1 = random.randrange(0, size) - z1 = random.randrange(0, size) - x2 = random.randrange(x1 + 1, max(x1 + 2, size)) - y2 = random.randrange(y1 + 1, max(y1 + 2, size)) - z2 = random.randrange(z1 + 1, max(z1 + 2, size)) - maybe_non_contiguous = t[x1:x2, y1:y2, z1:z2] - assert maybe_non_contiguous.numel() > 0 - - if not maybe_non_contiguous.is_contiguous(): - seed = random.randrange(1000) - - non_contiguous = maybe_non_contiguous - gen = csprng.create_mt19937_generator(seed) - non_contiguous.random_(generator=gen) - - contiguous = torch.zeros_like(non_contiguous) - gen = csprng.create_mt19937_generator(seed) - contiguous.random_(generator=gen) - - assert contiguous.is_contiguous() - self.assertTrue((non_contiguous == contiguous).all()) - - for x in range(0, size): - for y in range(0, size): - for z in range(0, size): - if ( - not x1 <= x < x2 - and not y1 <= y < y2 - and not z1 <= z < z2 - ): - self.assertTrue(t[x, y, z] == 0) - - @unittest.skipIf(IS_SANDCASTLE or IS_FBCODE, "Does not work on Sandcastle") - @unittest.skipIf(torch.get_num_threads() < 2, "requires multithreading CPU") - def test_cpu_parallel(self): - urandom_gen = csprng.create_random_device_generator("/dev/urandom") - - def measure(size): - t = torch.empty(size, dtype=torch.float32, device="cpu") - start = time.time() - for i in range(20): - t.normal_(generator=urandom_gen) - finish = time.time() - return finish - start - - time_for_1K = measure(1000) - time_for_1M = measure(1000000) - # Pessimistic check that parallel execution gives >= 1.5 performance boost - self.assertTrue(time_for_1M / time_for_1K < 1000 / 1.5) - - @unittest.skipIf(IS_SANDCASTLE or IS_FBCODE, "Does not work on Sandcastle") - def test_version(self): - self.assertTrue(csprng.__version__) - self.assertTrue(csprng.git_version) - - def test_randperm(self): - for device in self.all_devices: - for gen in self.all_generators: - for dtype in self.int_dtypes: - for size in range(0, 20): - expected = torch.arange(size, dtype=dtype, device=device) - - actual = torch.randperm( - size, dtype=dtype, device=device, generator=gen - ) - - actual_out = torch.empty(1, dtype=dtype, device=device) - torch.randperm(size, out=actual_out, generator=gen) - - if size >= 10: - self.assertTrue(not torch.allclose(expected, actual)) - self.assertTrue(not torch.allclose(expected, actual_out)) - - actual = actual.sort()[0] - actual_out = actual.sort()[0] - - self.assertTrue(torch.allclose(expected, actual)) - self.assertTrue(torch.allclose(expected, actual_out)) - - def test_encrypt_decrypt(self): - key_size_bytes = 16 - block_size_bytes = 16 - - def sizeof(dtype): - if dtype == torch.bool: - return 1 - elif dtype.is_floating_point: - return torch.finfo(dtype).bits // 8 - else: - return torch.iinfo(dtype).bits // 8 - - def pad(data, pad_size): - if len(data) % pad_size == 0: - return data - length = pad_size - (len(data) % pad_size) - return data + bytes([0]) * length - - def create_aes(m, k): - if m == "ecb": - return AES.new(k.tobytes(), AES.MODE_ECB) - elif m == "ctr": - ctr = Counter.new( - AES.block_size * 8, initial_value=0, little_endian=True - ) - return AES.new(k.tobytes(), AES.MODE_CTR, counter=ctr) - else: - return None - - for key_dtype in self.all_dtypes: - key_size = key_size_bytes // sizeof(key_dtype) - key = torch.empty(key_size, dtype=key_dtype).random_() - key_np = to_bytes(key) - for initial_dtype in self.all_dtypes: - for initial_size in [0, 4, 8, 15, 16, 23, 42]: - initial = torch.empty(initial_size, dtype=initial_dtype).random_() - initial_np = to_bytes(initial) - initial_size_bytes = initial_size * sizeof(initial_dtype) - for encrypted_dtype in self.all_dtypes: - encrypted_size = ( - (initial_size_bytes + block_size_bytes - 1) - // block_size_bytes - * block_size_bytes - // sizeof(encrypted_dtype) - ) - encrypted = torch.zeros(encrypted_size, dtype=encrypted_dtype) - for decrypted_dtype in self.all_dtypes: - decrypted_size = ( - initial_size_bytes + sizeof(decrypted_dtype) - 1 - ) // sizeof(decrypted_dtype) - decrypted = torch.zeros( - decrypted_size, dtype=decrypted_dtype - ) - for mode in ["ecb", "ctr"]: - for device in self.all_devices: - key = key.to(device) - initial = initial.to(device) - encrypted = encrypted.to(device) - decrypted = decrypted.to(device) - - csprng.encrypt( - initial, encrypted, key, "aes128", mode - ) - encrypted_np = to_bytes(encrypted) - - aes = create_aes(mode, key_np) - - encrypted_expected = np.frombuffer( - aes.encrypt( - pad(initial_np.tobytes(), block_size_bytes) - ), - dtype=np.int8, - ) - self.assertTrue( - np.array_equal(encrypted_np, encrypted_expected) - ) - - csprng.decrypt( - encrypted, decrypted, key, "aes128", mode - ) - decrypted_np = to_bytes(decrypted)[ - :initial_size_bytes - ] - - aes = create_aes(mode, key_np) - - decrypted_expected = np.frombuffer( - aes.decrypt( - pad( - encrypted_np.tobytes(), block_size_bytes - ) - ), - dtype=np.int8, - )[:initial_size_bytes] - self.assertTrue( - np.array_equal(decrypted_np, decrypted_expected) - ) - - self.assertTrue( - np.array_equal(initial_np, decrypted_np) - ) - - def test_encrypt_decrypt_inplace(self): - key_size_bytes = 16 - - def sizeof(dtype): - if dtype == torch.bool: - return 1 - elif dtype.is_floating_point: - return torch.finfo(dtype).bits // 8 - else: - return torch.iinfo(dtype).bits // 8 - - def create_aes(m, k): - if m == "ecb": - return AES.new(k.tobytes(), AES.MODE_ECB) - elif m == "ctr": - ctr = Counter.new( - AES.block_size * 8, initial_value=0, little_endian=True - ) - return AES.new(k.tobytes(), AES.MODE_CTR, counter=ctr) - else: - return None - - for key_dtype in self.all_dtypes: - key_size = key_size_bytes // sizeof(key_dtype) - key = torch.empty(key_size, dtype=key_dtype).random_() - key_np = to_bytes(key) - for initial_dtype in self.all_dtypes: - for initial_size_bytes in [0, 16, 256]: - initial_size = initial_size_bytes // sizeof(initial_dtype) - initial = torch.empty(initial_size, dtype=initial_dtype).random_() - initial_np = to_bytes(initial) - initial_np_copy = np.copy(initial_np) - for mode in ["ecb", "ctr"]: - for device in self.all_devices: - key = key.to(device) - initial = initial.to(device) - - csprng.encrypt(initial, initial, key, "aes128", mode) - encrypted_np = to_bytes(initial) - aes = create_aes(mode, key_np) - encrypted_expected = np.frombuffer( - aes.encrypt(initial_np_copy.tobytes()), dtype=np.int8 - ) - self.assertTrue( - np.array_equal(encrypted_np, encrypted_expected) - ) - - encrypted_np_copy = np.copy(encrypted_np) - - csprng.decrypt(initial, initial, key, "aes128", mode) - decrypted_np = to_bytes(initial) - aes = create_aes(mode, key_np) - decrypted_expected = np.frombuffer( - aes.decrypt(encrypted_np_copy.tobytes()), dtype=np.int8 - ) - self.assertTrue( - np.array_equal(decrypted_np, decrypted_expected) - ) - - self.assertTrue( - np.array_equal(initial_np_copy, decrypted_np) - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/torchcsprng/__init__.py b/torchcsprng/__init__.py deleted file mode 100644 index a05c967..0000000 --- a/torchcsprng/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import torch - -from torchcsprng._C import * - - -try: - from .version import __version__, git_version # noqa: F401 -except ImportError: - pass diff --git a/torchcsprng/__init__.pyi b/torchcsprng/__init__.pyi deleted file mode 100644 index 236dbfe..0000000 --- a/torchcsprng/__init__.pyi +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from torch import Generator, Tensor - -def supports_cuda() -> bool: ... -def create_random_device_generator(token: str = "") -> Generator: ... -def create_mt19937_generator(seed: int = 0): ... -def encrypt(input: Tensor, output: Tensor, key: Tensor, cipher, mode): ... -def decrypt(input: Tensor, output: Tensor, key: Tensor, cipher, mode): ... -def __version__() -> str: ... -def git_version() -> str: ... From 5bf7869f45acecf1c79442168bd9c260a43a8d08 Mon Sep 17 00:00:00 2001 From: myl7 Date: Mon, 11 Dec 2023 14:51:35 +0800 Subject: [PATCH 02/10] Clean torchcsprng c src Remove unused CPU impl. Format code. --- .clang-format | 9 + torchcsprng/{csrc => }/OffsetCalculator.cuh | 13 +- torchcsprng/{csrc => }/THCIntegerDivider.cuh | 27 +- torchcsprng/{csrc/aes.inc => aes.h} | 255 +++++----- torchcsprng/block_cipher.h | 178 +++++++ torchcsprng/csprng.cpp | 292 +++++++++++ torchcsprng/csrc/block_cipher.h | 201 -------- torchcsprng/csrc/cpu/kernels.cpp | 16 - torchcsprng/csrc/cpu/kernels.h | 19 - torchcsprng/csrc/csprng.cpp | 377 --------------- torchcsprng/csrc/cuda/kernels.cu | 16 - torchcsprng/csrc/cuda/kernels.cuh | 19 - torchcsprng/csrc/kernels_body.inc | 437 ----------------- torchcsprng/csrc/kernels_commons.h | 47 -- torchcsprng/csrc/kernels_decls.inc | 56 --- torchcsprng/kernels.cu | 484 +++++++++++++++++++ torchcsprng/kernels.cuh | 84 ++++ torchcsprng/kernels_commons.h | 78 +++ torchcsprng/{csrc => }/macros.h | 0 19 files changed, 1264 insertions(+), 1344 deletions(-) create mode 100644 .clang-format rename torchcsprng/{csrc => }/OffsetCalculator.cuh (91%) rename torchcsprng/{csrc => }/THCIntegerDivider.cuh (86%) rename torchcsprng/{csrc/aes.inc => aes.h} (54%) create mode 100644 torchcsprng/block_cipher.h create mode 100644 torchcsprng/csprng.cpp delete mode 100644 torchcsprng/csrc/block_cipher.h delete mode 100644 torchcsprng/csrc/cpu/kernels.cpp delete mode 100644 torchcsprng/csrc/cpu/kernels.h delete mode 100644 torchcsprng/csrc/csprng.cpp delete mode 100644 torchcsprng/csrc/cuda/kernels.cu delete mode 100644 torchcsprng/csrc/cuda/kernels.cuh delete mode 100644 torchcsprng/csrc/kernels_body.inc delete mode 100644 torchcsprng/csrc/kernels_commons.h delete mode 100644 torchcsprng/csrc/kernels_decls.inc create mode 100644 torchcsprng/kernels.cu create mode 100644 torchcsprng/kernels.cuh create mode 100644 torchcsprng/kernels_commons.h rename torchcsprng/{csrc => }/macros.h (100%) diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..0b97c10 --- /dev/null +++ b/.clang-format @@ -0,0 +1,9 @@ +BasedOnStyle: Google +DerivePointerAlignment: false +PointerAlignment: Right +ColumnLimit: 120 +AllowShortFunctionsOnASingleLine: Empty +AlignAfterOpenBracket: DontAlign +ContinuationIndentWidth: 2 +SortIncludes: Never +AlignEscapedNewlines: DontAlign diff --git a/torchcsprng/csrc/OffsetCalculator.cuh b/torchcsprng/OffsetCalculator.cuh similarity index 91% rename from torchcsprng/csrc/OffsetCalculator.cuh rename to torchcsprng/OffsetCalculator.cuh index c38e54a..15fcd0e 100644 --- a/torchcsprng/csrc/OffsetCalculator.cuh +++ b/torchcsprng/OffsetCalculator.cuh @@ -33,7 +33,9 @@ struct OffsetCalculator { // if element_sizes is nullptr, then the strides will be in bytes, otherwise // the strides will be in # of elements. - OffsetCalculator(int dims, const int64_t* sizes, const int64_t* const* strides, const int64_t* element_sizes=nullptr) : dims(dims) { + OffsetCalculator( + int dims, const int64_t *sizes, const int64_t *const *strides, const int64_t *element_sizes = nullptr) + : dims(dims) { TORCH_CHECK(dims <= MAX_DIMS, "tensor has too many (>", MAX_DIMS, ") dims"); for (int i = 0; i < MAX_DIMS; ++i) { if (i < dims) { @@ -43,7 +45,7 @@ struct OffsetCalculator { } for (int arg = 0; arg < NARGS; arg++) { int64_t element_size = (element_sizes == nullptr ? 1LL : element_sizes[arg]); - strides_[i][arg] = i < dims ? strides[arg][i] / element_size : 0; + strides_[i][arg] = i < dims ? strides[arg][i] / element_size : 0; } } } @@ -67,7 +69,6 @@ struct OffsetCalculator { for (int arg = 0; arg < NARGS; arg++) { offsets[arg] += divmod.mod * strides_[dim][arg]; } - } return offsets; } @@ -96,10 +97,10 @@ struct TrivialOffsetCalculator { } }; -template -static OffsetCalculator make_offset_calculator(const at::TensorIterator& iter) { +template +static OffsetCalculator make_offset_calculator(const at::TensorIterator &iter) { AT_ASSERT(N <= iter.ntensors()); - std::array strides; + std::array strides; for (int i = 0; i < N; i++) { strides[i] = iter.strides(i).data(); } diff --git a/torchcsprng/csrc/THCIntegerDivider.cuh b/torchcsprng/THCIntegerDivider.cuh similarity index 86% rename from torchcsprng/csrc/THCIntegerDivider.cuh rename to torchcsprng/THCIntegerDivider.cuh index cdf01af..b7dfb6a 100644 --- a/torchcsprng/csrc/THCIntegerDivider.cuh +++ b/torchcsprng/THCIntegerDivider.cuh @@ -62,18 +62,22 @@ template struct DivMod { Value div, mod; - C10_HOST_DEVICE DivMod(Value div, Value mod) : div(div), mod(mod) { } + C10_HOST_DEVICE DivMod(Value div, Value mod) : div(div), mod(mod) {} }; // Base case: we only have an implementation for uint32_t for now. For // everything else, we use plain division. template struct IntDivider { - IntDivider() { } // Dummy constructor for arrays. - IntDivider(Value d) : divisor(d) { } + IntDivider() {} // Dummy constructor for arrays. + IntDivider(Value d) : divisor(d) {} - C10_HOST_DEVICE inline Value div(Value n) const { return n / divisor; } - C10_HOST_DEVICE inline Value mod(Value n) const { return n % divisor; } + C10_HOST_DEVICE inline Value div(Value n) const { + return n / divisor; + } + C10_HOST_DEVICE inline Value mod(Value n) const { + return n % divisor; + } C10_HOST_DEVICE inline DivMod divmod(Value n) const { return DivMod(n / divisor, n % divisor); } @@ -86,13 +90,14 @@ template <> struct IntDivider { static_assert(sizeof(unsigned int) == 4, "Assumes 32-bit unsigned int."); - IntDivider() { } // Dummy constructor for arrays. + IntDivider() {} // Dummy constructor for arrays. IntDivider(unsigned int d) : divisor(d) { assert(divisor >= 1 && divisor <= INT32_MAX); // TODO: gcc/clang has __builtin_clz() but it's not portable. - for (shift = 0; shift < 32; shift++) if ((1U << shift) >= divisor) break; + for (shift = 0; shift < 32; shift++) + if ((1U << shift) >= divisor) break; uint64_t one = 1; uint64_t magic = ((one << 32) * ((one << shift) - divisor)) / divisor + 1; @@ -108,7 +113,7 @@ struct IntDivider { return (t + n) >> shift; #else // Using uint64_t so that the addition does not overflow. - uint64_t t = ((uint64_t) n * m1) >> 32; + uint64_t t = ((uint64_t)n * m1) >> 32; return (t + n) >> shift; #endif } @@ -123,8 +128,8 @@ struct IntDivider { } unsigned int divisor; // d above. - unsigned int m1; // Magic number: m' above. - unsigned int shift; // Shift amounts. + unsigned int m1; // Magic number: m' above. + unsigned int shift; // Shift amounts. }; -#endif // THC_INTEGER_DIVIDER_INC +#endif // THC_INTEGER_DIVIDER_INC diff --git a/torchcsprng/csrc/aes.inc b/torchcsprng/aes.h similarity index 54% rename from torchcsprng/csrc/aes.inc rename to torchcsprng/aes.h index db29996..32041f3 100644 --- a/torchcsprng/csrc/aes.inc +++ b/torchcsprng/aes.h @@ -10,7 +10,7 @@ namespace aes { // This AES implementation is based on // https://github.com/kokke/tiny-AES-c/blob/master/aes.c // authored by kokke and et al. and distributed under public domain license. -// +// // This is free and unencumbered software released into the public domain. // // Anyone is free to copy, modify, publish, use, compile, sell, or @@ -45,14 +45,14 @@ namespace aes { #define Nb 4 #if defined(AES256) && (AES256 == 1) - #define Nk 8 - #define Nr 14 +#define Nk 8 +#define Nr 14 #elif defined(AES192) && (AES192 == 1) - #define Nk 6 - #define Nr 12 +#define Nk 6 +#define Nr 12 #else - #define Nk 4 // The number of 32 bit words in a key. - #define Nr 10 // The number of rounds in AES Cipher. +#define Nk 4 // The number of 32 bit words in a key. +#define Nr 10 // The number of rounds in AES Cipher. #endif constexpr size_t block_t_size = 16; @@ -60,62 +60,55 @@ constexpr size_t block_t_size = 16; typedef uint8_t state_t[4][4]; // The lookup-tables are marked const so they can be placed in read-only storage instead of RAM -// The numbers below can be computed dynamically trading ROM for RAM - +// The numbers below can be computed dynamically trading ROM for RAM - // This can be useful in (embedded) bootloader applications, where ROM is often limited. TORCH_CSPRNG_CONSTANT const uint8_t sbox[256] = { - //0 1 2 3 4 5 6 7 8 9 A B C D E F - 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, - 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, - 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, - 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, - 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, - 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, - 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, - 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, - 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, - 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, - 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, - 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, - 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, - 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, - 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, - 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 }; - -TORCH_CSPRNG_CONSTANT const uint8_t rsbox[256] = { - 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, - 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, - 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, - 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, - 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, - 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, - 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, - 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, - 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, - 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, - 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, - 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, - 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, - 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, - 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, - 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d }; - -// The round constant word array, Rcon[i], contains the values given by + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, 0xca, 0x82, 0xc9, + 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, + 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, + 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, + 0x29, 0xe3, 0x2f, 0x84, 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, + 0xcf, 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, 0x51, 0xa3, + 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, 0xcd, 0x0c, 0x13, 0xec, 0x5f, + 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, + 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, + 0x62, 0x91, 0x95, 0xe4, 0x79, 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, + 0xae, 0x08, 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, 0x70, + 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, 0xe1, 0xf8, 0x98, 0x11, + 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, + 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16}; + +TORCH_CSPRNG_CONSTANT const uint8_t rsbox[256] = {0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, + 0x9e, 0x81, 0xf3, 0xd7, 0xfb, 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, + 0xe9, 0xcb, 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, 0x08, + 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, 0x72, 0xf8, 0xf6, 0x64, + 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, + 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, + 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, + 0x13, 0x8a, 0x6b, 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, 0x47, 0xf1, 0x1a, + 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, + 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, + 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, + 0x93, 0xc9, 0x9c, 0xef, 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, + 0x61, 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d}; + +// The round constant word array, Rcon[i], contains the values given by // x to the power (i-1) being powers of x (x is denoted as {02}) in the field GF(2^8) -TORCH_CSPRNG_CONSTANT const uint8_t Rcon[11] = { - 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 }; +TORCH_CSPRNG_CONSTANT const uint8_t Rcon[11] = {0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36}; #define getSBoxValue(num) (sbox[(num)]) #define getSBoxInvert(num) (rsbox[(num)]) -// This function produces Nb(Nr+1) round keys. The round keys are used in each round to decrypt the states. -TORCH_CSPRNG_HOST_DEVICE void KeyExpansion(uint8_t* RoundKey, const uint8_t* Key){ +// This function produces Nb(Nr+1) round keys. The round keys are used in each round to decrypt the states. +TORCH_CSPRNG_HOST_DEVICE void KeyExpansion(uint8_t *RoundKey, const uint8_t *Key) { unsigned int i, j, k; - uint8_t tempa[4]; // Used for the column/row operations - + uint8_t tempa[4]; // Used for the column/row operations + // The first round key is the key itself. - for (i = 0; i < Nk; ++i) - { + for (i = 0; i < Nk; ++i) { RoundKey[(i * 4) + 0] = Key[(i * 4) + 0]; RoundKey[(i * 4) + 1] = Key[(i * 4) + 1]; RoundKey[(i * 4) + 2] = Key[(i * 4) + 2]; @@ -123,19 +116,16 @@ TORCH_CSPRNG_HOST_DEVICE void KeyExpansion(uint8_t* RoundKey, const uint8_t* Key } // All other round keys are found from the previous round keys. - for (i = Nk; i < Nb * (Nr + 1); ++i) - { + for (i = Nk; i < Nb * (Nr + 1); ++i) { { k = (i - 1) * 4; - tempa[0]=RoundKey[k + 0]; - tempa[1]=RoundKey[k + 1]; - tempa[2]=RoundKey[k + 2]; - tempa[3]=RoundKey[k + 3]; - + tempa[0] = RoundKey[k + 0]; + tempa[1] = RoundKey[k + 1]; + tempa[2] = RoundKey[k + 2]; + tempa[3] = RoundKey[k + 3]; } - if (i % Nk == 0) - { + if (i % Nk == 0) { // This function shifts the 4 bytes in a word to the left once. // [a0,a1,a2,a3] becomes [a1,a2,a3,a0] @@ -148,7 +138,7 @@ TORCH_CSPRNG_HOST_DEVICE void KeyExpansion(uint8_t* RoundKey, const uint8_t* Key tempa[3] = u8tmp; } - // SubWord() is a function that takes a four-byte input word and + // SubWord() is a function that takes a four-byte input word and // applies the S-box to each of the four bytes to produce an output word. // Function Subword() @@ -159,11 +149,10 @@ TORCH_CSPRNG_HOST_DEVICE void KeyExpansion(uint8_t* RoundKey, const uint8_t* Key tempa[3] = getSBoxValue(tempa[3]); } - tempa[0] = tempa[0] ^ Rcon[i/Nk]; + tempa[0] = tempa[0] ^ Rcon[i / Nk]; } #if defined(AES256) && (AES256 == 1) - if (i % Nk == 4) - { + if (i % Nk == 4) { // Function Subword() { tempa[0] = getSBoxValue(tempa[0]); @@ -173,7 +162,8 @@ TORCH_CSPRNG_HOST_DEVICE void KeyExpansion(uint8_t* RoundKey, const uint8_t* Key } } #endif - j = i * 4; k=(i - Nk) * 4; + j = i * 4; + k = (i - Nk) * 4; RoundKey[j + 0] = RoundKey[k + 0] ^ tempa[0]; RoundKey[j + 1] = RoundKey[k + 1] ^ tempa[1]; RoundKey[j + 2] = RoundKey[k + 2] ^ tempa[2]; @@ -183,13 +173,10 @@ TORCH_CSPRNG_HOST_DEVICE void KeyExpansion(uint8_t* RoundKey, const uint8_t* Key // This function adds the round key to state. // The round key is added to the state by an XOR function. -TORCH_CSPRNG_HOST_DEVICE void AddRoundKey(uint8_t round, state_t* state, const uint8_t* RoundKey) -{ - uint8_t i,j; - for (i = 0; i < 4; ++i) - { - for (j = 0; j < 4; ++j) - { +TORCH_CSPRNG_HOST_DEVICE void AddRoundKey(uint8_t round, state_t *state, const uint8_t *RoundKey) { + uint8_t i, j; + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) { (*state)[i][j] ^= RoundKey[(round * Nb * 4) + (i * Nb) + j]; } } @@ -197,13 +184,10 @@ TORCH_CSPRNG_HOST_DEVICE void AddRoundKey(uint8_t round, state_t* state, const u // The SubBytes Function Substitutes the values in the // state matrix with values in an S-box. -TORCH_CSPRNG_HOST_DEVICE void SubBytes(state_t* state) -{ +TORCH_CSPRNG_HOST_DEVICE void SubBytes(state_t *state) { uint8_t i, j; - for (i = 0; i < 4; ++i) - { - for (j = 0; j < 4; ++j) - { + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) { (*state)[j][i] = getSBoxValue((*state)[j][i]); } } @@ -212,73 +196,72 @@ TORCH_CSPRNG_HOST_DEVICE void SubBytes(state_t* state) // The ShiftRows() function shifts the rows in the state to the left. // Each row is shifted with different offset. // Offset = Row number. So the first row is not shifted. -TORCH_CSPRNG_HOST_DEVICE void ShiftRows(state_t* state) -{ +TORCH_CSPRNG_HOST_DEVICE void ShiftRows(state_t *state) { uint8_t temp; - // Rotate first row 1 columns to left - temp = (*state)[0][1]; + // Rotate first row 1 columns to left + temp = (*state)[0][1]; (*state)[0][1] = (*state)[1][1]; (*state)[1][1] = (*state)[2][1]; (*state)[2][1] = (*state)[3][1]; (*state)[3][1] = temp; - // Rotate second row 2 columns to left - temp = (*state)[0][2]; + // Rotate second row 2 columns to left + temp = (*state)[0][2]; (*state)[0][2] = (*state)[2][2]; (*state)[2][2] = temp; - temp = (*state)[1][2]; + temp = (*state)[1][2]; (*state)[1][2] = (*state)[3][2]; (*state)[3][2] = temp; // Rotate third row 3 columns to left - temp = (*state)[0][3]; + temp = (*state)[0][3]; (*state)[0][3] = (*state)[3][3]; (*state)[3][3] = (*state)[2][3]; (*state)[2][3] = (*state)[1][3]; (*state)[1][3] = temp; } -TORCH_CSPRNG_HOST_DEVICE uint8_t xtime(uint8_t x) -{ - return ((x<<1) ^ (((x>>7) & 1) * 0x1b)); +TORCH_CSPRNG_HOST_DEVICE uint8_t xtime(uint8_t x) { + return ((x << 1) ^ (((x >> 7) & 1) * 0x1b)); } // MixColumns function mixes the columns of the state matrix -TORCH_CSPRNG_HOST_DEVICE void MixColumns(state_t* state) -{ +TORCH_CSPRNG_HOST_DEVICE void MixColumns(state_t *state) { uint8_t i; uint8_t Tmp, Tm, t; - for (i = 0; i < 4; ++i) - { - t = (*state)[i][0]; - Tmp = (*state)[i][0] ^ (*state)[i][1] ^ (*state)[i][2] ^ (*state)[i][3] ; - Tm = (*state)[i][0] ^ (*state)[i][1] ; Tm = xtime(Tm); (*state)[i][0] ^= Tm ^ Tmp ; - Tm = (*state)[i][1] ^ (*state)[i][2] ; Tm = xtime(Tm); (*state)[i][1] ^= Tm ^ Tmp ; - Tm = (*state)[i][2] ^ (*state)[i][3] ; Tm = xtime(Tm); (*state)[i][2] ^= Tm ^ Tmp ; - Tm = (*state)[i][3] ^ t ; Tm = xtime(Tm); (*state)[i][3] ^= Tm ^ Tmp ; + for (i = 0; i < 4; ++i) { + t = (*state)[i][0]; + Tmp = (*state)[i][0] ^ (*state)[i][1] ^ (*state)[i][2] ^ (*state)[i][3]; + Tm = (*state)[i][0] ^ (*state)[i][1]; + Tm = xtime(Tm); + (*state)[i][0] ^= Tm ^ Tmp; + Tm = (*state)[i][1] ^ (*state)[i][2]; + Tm = xtime(Tm); + (*state)[i][1] ^= Tm ^ Tmp; + Tm = (*state)[i][2] ^ (*state)[i][3]; + Tm = xtime(Tm); + (*state)[i][2] ^= Tm ^ Tmp; + Tm = (*state)[i][3] ^ t; + Tm = xtime(Tm); + (*state)[i][3] ^= Tm ^ Tmp; } } -TORCH_CSPRNG_HOST_DEVICE uint8_t Multiply(uint8_t x, uint8_t y) -{ - return (((y & 1) * x) ^ - ((y>>1 & 1) * xtime(x)) ^ - ((y>>2 & 1) * xtime(xtime(x))) ^ - ((y>>3 & 1) * xtime(xtime(xtime(x)))) ^ - ((y>>4 & 1) * xtime(xtime(xtime(xtime(x)))))); /* this last call to xtime() can be omitted */ +TORCH_CSPRNG_HOST_DEVICE uint8_t Multiply(uint8_t x, uint8_t y) { + return (((y & 1) * x) ^ ((y >> 1 & 1) * xtime(x)) ^ ((y >> 2 & 1) * xtime(xtime(x))) ^ + ((y >> 3 & 1) * xtime(xtime(xtime(x)))) ^ + ((y >> 4 & 1) * xtime(xtime(xtime(xtime(x)))))); /* this last call to xtime() can be omitted */ } // MixColumns function mixes the columns of the state matrix. // The method used to multiply may be difficult to understand for the inexperienced. // Please use the references to gain more information. -TORCH_CSPRNG_HOST_DEVICE void InvMixColumns(state_t* state) -{ +TORCH_CSPRNG_HOST_DEVICE void InvMixColumns(state_t *state) { int i; uint8_t a, b, c, d; - for (i = 0; i < 4; ++i) - { + for (i = 0; i < 4; ++i) { a = (*state)[i][0]; b = (*state)[i][1]; c = (*state)[i][2]; @@ -293,20 +276,16 @@ TORCH_CSPRNG_HOST_DEVICE void InvMixColumns(state_t* state) // The SubBytes Function Substitutes the values in the // state matrix with values in an S-box. -TORCH_CSPRNG_HOST_DEVICE void InvSubBytes(state_t* state) -{ +TORCH_CSPRNG_HOST_DEVICE void InvSubBytes(state_t *state) { uint8_t i, j; - for (i = 0; i < 4; ++i) - { - for (j = 0; j < 4; ++j) - { + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) { (*state)[j][i] = getSBoxInvert((*state)[j][i]); } } } -TORCH_CSPRNG_HOST_DEVICE void InvShiftRows(state_t* state) -{ +TORCH_CSPRNG_HOST_DEVICE void InvShiftRows(state_t *state) { uint8_t temp; // Rotate first row 1 columns to right @@ -333,56 +312,54 @@ TORCH_CSPRNG_HOST_DEVICE void InvShiftRows(state_t* state) (*state)[3][3] = temp; } -TORCH_CSPRNG_HOST_DEVICE void encrypt(uint8_t* state, const uint8_t* key) { +TORCH_CSPRNG_HOST_DEVICE void encrypt(uint8_t *state, const uint8_t *key) { uint8_t RoundKey[176]; - KeyExpansion(RoundKey, key); + KeyExpansion(RoundKey, key); uint8_t round = 0; // Add the First round key to the state before starting the rounds. - AddRoundKey(0, (state_t*)state, RoundKey); + AddRoundKey(0, (state_t *)state, RoundKey); // There will be Nr rounds. // The first Nr-1 rounds are identical. // These Nr rounds are executed in the loop below. // Last one without MixColumns() - for (round = 1; ; ++round) - { - SubBytes((state_t*)state); - ShiftRows((state_t*)state); + for (round = 1;; ++round) { + SubBytes((state_t *)state); + ShiftRows((state_t *)state); if (round == Nr) { break; } - MixColumns((state_t*)state); - AddRoundKey(round, (state_t*)state, RoundKey); + MixColumns((state_t *)state); + AddRoundKey(round, (state_t *)state, RoundKey); } // Add round key to last round - AddRoundKey(Nr, (state_t*)state, RoundKey); + AddRoundKey(Nr, (state_t *)state, RoundKey); } -TORCH_CSPRNG_HOST_DEVICE void decrypt(uint8_t* state, const uint8_t* key) { +TORCH_CSPRNG_HOST_DEVICE void decrypt(uint8_t *state, const uint8_t *key) { uint8_t RoundKey[176]; KeyExpansion(RoundKey, key); uint8_t round = 0; // Add the First round key to the state before starting the rounds. - AddRoundKey(Nr, (state_t*)state, RoundKey); + AddRoundKey(Nr, (state_t *)state, RoundKey); // There will be Nr rounds. // The first Nr-1 rounds are identical. // These Nr rounds are executed in the loop below. // Last one without InvMixColumn() - for (round = (Nr - 1); ; --round) - { - InvShiftRows((state_t*)state); - InvSubBytes((state_t*)state); - AddRoundKey(round, (state_t*)state, RoundKey); + for (round = (Nr - 1);; --round) { + InvShiftRows((state_t *)state); + InvSubBytes((state_t *)state); + AddRoundKey(round, (state_t *)state, RoundKey); if (round == 0) { break; } - InvMixColumns((state_t*)state); + InvMixColumns((state_t *)state); } } -} +} // namespace aes diff --git a/torchcsprng/block_cipher.h b/torchcsprng/block_cipher.h new file mode 100644 index 0000000..aeae133 --- /dev/null +++ b/torchcsprng/block_cipher.h @@ -0,0 +1,178 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include "macros.h" +#include +#include +#include "OffsetCalculator.cuh" +#include +#include +#include + +#if defined(__CUDACC__) || defined(__HIPCC__) +#include +#include +#endif + +#if defined(__CUDACC__) || defined(__HIPCC__) +#define UNROLL_IF_CUDA #pragma unroll +#else +#define UNROLL_IF_CUDA +#endif + +namespace torch { +namespace csprng { + +template +TORCH_CSPRNG_HOST_DEVICE static void copy_input_to_block(int64_t idx, uint8_t *block, int block_size, void *input_ptr, + int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc) { + for (auto i = 0; i < block_size / input_type_size; ++i) { + const auto linear_index = idx * (block_size / input_type_size) + i; + if (linear_index < input_numel) { + std::memcpy(block + i * input_type_size, + &(reinterpret_cast(input_ptr)[input_index_calc(linear_index)]), input_type_size); + } + } +} + +template +TORCH_CSPRNG_HOST_DEVICE static void copy_block_to_output(int64_t idx, uint8_t *block, int output_elem_per_block, + void *output_ptr, int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc) { + for (auto i = 0; i < output_elem_per_block; ++i) { + const auto linear_index = idx * output_elem_per_block + i; + if (linear_index < output_numel) { + std::memcpy(&(reinterpret_cast(output_ptr)[output_index_calc(linear_index)]), + block + i * output_type_size, output_type_size); + } + } +} + +template +TORCH_CSPRNG_HOST_DEVICE static void block_cipher_kernel_helper(int64_t idx, cipher_t cipher, int output_elem_per_block, + void *input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, void *output_ptr, + int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc, transform_t transform) { + uint8_t block[block_size]; + std::memset(&block, 0, block_size); // is it ok to use zeros as padding? + if (input_ptr != nullptr) { + copy_input_to_block(idx, block, block_size, input_ptr, input_numel, input_type_size, input_index_calc); + } + cipher(idx, block); + transform(block); + copy_block_to_output( + idx, block, output_elem_per_block, output_ptr, output_numel, output_type_size, output_index_calc); +} + +#if defined(__CUDACC__) || defined(__HIPCC__) +template +__global__ static void block_cipher_kernel_cuda(cipher_t cipher, int output_elem_per_block, void *input_ptr, + int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, void *output_ptr, int64_t output_numel, + int output_type_size, output_index_calc_t output_index_calc, transform_t transform) { + const auto idx = blockIdx.x * blockDim.x + threadIdx.x; + block_cipher_kernel_helper(idx, cipher, output_elem_per_block, input_ptr, input_numel, input_type_size, + input_index_calc, output_ptr, output_numel, output_type_size, output_index_calc, transform); +} +#endif + +template +static void block_cipher_kernel_cpu_serial(int64_t begin, int64_t end, cipher_t cipher, int output_elem_per_block, + void *input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, void *output_ptr, + int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc, transform_t transform) { + for (auto idx = begin; idx < end; ++idx) { + block_cipher_kernel_helper(idx, cipher, output_elem_per_block, input_ptr, input_numel, input_type_size, + input_index_calc, output_ptr, output_numel, output_type_size, output_index_calc, transform); + } +} + +template +static void block_cipher_kernel_cpu(int64_t total, cipher_t cipher, int output_elem_per_block, void *input_ptr, + int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, void *output_ptr, int64_t output_numel, + int output_type_size, output_index_calc_t output_index_calc, transform_t transform_func) { + if (total < at::internal::GRAIN_SIZE || at::get_num_threads() == 1) { + block_cipher_kernel_cpu_serial(0, total, cipher, output_elem_per_block, input_ptr, input_numel, + input_type_size, input_index_calc, output_ptr, output_numel, output_type_size, output_index_calc, transform_func); + } else { + at::parallel_for(0, total, at::internal::GRAIN_SIZE, [&](int64_t begin, int64_t end) { + block_cipher_kernel_cpu_serial(begin, end, cipher, output_elem_per_block, input_ptr, input_numel, + input_type_size, input_index_calc, output_ptr, output_numel, output_type_size, output_index_calc, + transform_func); + }); + } +} + +template +void block_cipher(void *input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, + void *output_ptr, int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc, + at::Device device, cipher_t cipher, int output_elem_per_block, transform_t transform_func) { + if (output_ptr == nullptr || output_numel == 0) { + return; + } + + if (device.type() == at::kCPU) { + const auto total = (output_numel + output_elem_per_block - 1) / output_elem_per_block; + block_cipher_kernel_cpu(total, cipher, output_elem_per_block, input_ptr, input_numel, input_type_size, + input_index_calc, output_ptr, output_numel, output_type_size, output_index_calc, transform_func); + } else if (device.type() == at::kCUDA) { +#if defined(__CUDACC__) || defined(__HIPCC__) + const auto threads = 256; + const auto grid = (output_numel + (threads * output_elem_per_block) - 1) / (threads * output_elem_per_block); + auto stream = at::cuda::getCurrentCUDAStream(); + block_cipher_kernel_cuda<<>>(cipher, output_elem_per_block, input_ptr, + input_numel, input_type_size, input_index_calc, output_ptr, output_numel, output_type_size, output_index_calc, + transform_func); + AT_CUDA_CHECK(cudaGetLastError()); +#else + TORCH_CHECK(false, "torchcsprng was compiled without CUDA support"); +#endif + } else { + TORCH_CHECK(false, "block_cipher supports only CPU and CUDA devices"); + } +} + +template +void block_cipher(at::Tensor input, at::Tensor output, cipher_t cipher) { + const auto input_ptr = input.data_ptr(); + const auto input_numel = input.numel(); + + // Otherwise OffsetCalculator/IntDivider crashes with integer division by zero + if (input_ptr == nullptr || input_numel == 0) { + return; + } + + const auto input_type_size = input.element_size(); + const auto input_offset_calc = make_offset_calculator<1>(at::TensorIterator::nullary_op(input)); + const auto input_index_calc = [input_offset_calc] TORCH_CSPRNG_HOST_DEVICE( + uint32_t li) -> uint32_t { return input_offset_calc.get(li)[0]; }; + + const auto output_ptr = output.data_ptr(); + const auto output_numel = output.numel(); + + // Otherwise OffsetCalculator/IntDivider crashes with integer division by zero + if (output_ptr == nullptr || output_numel == 0) { + return; + } + + const auto output_type_size = output.element_size(); + const auto output_offset_calc = make_offset_calculator<1>(at::TensorIterator::nullary_op(output)); + const auto output_index_calc = [output_offset_calc] TORCH_CSPRNG_HOST_DEVICE( + uint32_t li) -> uint32_t { return output_offset_calc.get(li)[0]; }; + + const auto device = output.device(); + + torch::csprng::block_cipher(input_ptr, input_numel, input_type_size, input_index_calc, output_ptr, + output_numel, output_type_size, output_index_calc, device, cipher, block_size / output_type_size, + [] TORCH_CSPRNG_HOST_DEVICE(uint8_t * x) {}); +} + +} // namespace csprng +} // namespace torch diff --git a/torchcsprng/csprng.cpp b/torchcsprng/csprng.cpp new file mode 100644 index 0000000..26c527d --- /dev/null +++ b/torchcsprng/csprng.cpp @@ -0,0 +1,292 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include + +#include +#include +#include + +#include "kernels_commons.h" +#include "kernels.cuh" + +using namespace at; +using namespace torch::csprng; + +static const auto GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE = "generator does not support tensor device type"; +static const auto TENSOR_DEVICE_TYPE_IS_NOT_SUPPORTED = "tensor device type is not supported"; + +// ==================================================== Random ======================================================== + +Tensor &random_(Tensor &self, c10::optional gen) { + if (self.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::random_(self, gen); + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +Tensor &random_from_to(Tensor &self, int64_t from, optional to, c10::optional gen) { + if (self.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::random_from_to(self, from, to, gen); + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +Tensor &random_to(Tensor &self, int64_t to, c10::optional gen) { + if (self.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::random_to(self, to, gen); + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +// ==================================================== Uniform ======================================================= + +Tensor &uniform_(Tensor &self, double from, double to, c10::optional gen) { + if (self.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::uniform_(self, from, to, gen); + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +// ==================================================== Normal ======================================================== + +Tensor &normal_(Tensor &self, double mean, double std, c10::optional gen) { + if (self.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::normal_(self, mean, std, gen); + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +Tensor &normal_Tensor_float_out(const Tensor &mean, double std, c10::optional gen, Tensor &output) { + if (output.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::normal_Tensor_float_out(output, mean, std, gen); + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +Tensor &normal_float_Tensor_out(double mean, const Tensor &std, c10::optional gen, Tensor &output) { + if (output.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::normal_float_Tensor_out(output, mean, std, gen); + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +Tensor &normal_Tensor_Tensor_out(const Tensor &mean, const Tensor &std, c10::optional gen, Tensor &output) { + if (output.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::normal_Tensor_Tensor_out(output, mean, std, gen); + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +Tensor normal_Tensor_float(const Tensor &mean, double std, c10::optional gen) { + if (mean.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::normal_Tensor_float(mean, std, gen); + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +Tensor normal_float_Tensor(double mean, const Tensor &std, c10::optional gen) { + if (std.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::normal_float_Tensor(mean, std, gen); + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +Tensor normal_Tensor_Tensor(const Tensor &mean, const Tensor &std, c10::optional gen) { + if (mean.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::normal_Tensor_Tensor(mean, std, gen); + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +// ==================================================== Cauchy ======================================================== + +Tensor &cauchy_(Tensor &self, double median, double sigma, c10::optional gen) { + if (self.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::cauchy_(self, median, sigma, gen); + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +// ================================================== LogNormal ======================================================= + +Tensor &log_normal_(Tensor &self, double mean, double std, c10::optional gen) { + if (self.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::log_normal_(self, mean, std, gen); + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +// ================================================== Geometric ======================================================= + +Tensor &geometric_(Tensor &self, double p, c10::optional gen) { + if (self.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::geometric_(self, p, gen); + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +// ================================================== Exponential ===================================================== + +Tensor &exponential_(Tensor &self, double lambda, c10::optional gen) { + if (self.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::exponential_(self, lambda, gen); + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +// =============================================== Random permutation ================================================= + +// randperm implementation was copied from PyTorch to unblock CSPRNG users, but ultimately CSPRNG must reuse +// refactored randperm from PyTorch, see https://github.com/pytorch/pytorch/issues/43816 + +namespace { + +inline void check_supported_max_int_with_precision(int64_t n, const Tensor &tensor) { + TORCH_CHECK(at::scalar_tensor(n, tensor.options()).defined(), "n is too large for result tensor type: '", + tensor.toString(), "'"); + + // Ensure sufficient precision for floating point representation. + switch (tensor.scalar_type()) { + case at::ScalarType::Half: + TORCH_CHECK(n <= (int64_t(1) << 11) + 1, "n cannot be greater than 2049 for Half type."); + break; + case at::ScalarType::Float: + TORCH_CHECK(n <= (int64_t(1) << 24) + 1, "n cannot be greater than 2^24+1 for Float type."); + break; + case at::ScalarType::Double: // Unlikely to happen, but doesn't hurt to check + TORCH_CHECK(n <= (int64_t(1) << 53) + 1, "n cannot be greater than 2^53+1 for Double type."); + break; + default: + break; + } +} + +template +void randperm(Tensor &result, int64_t n, c10::optional generator) { + auto gen = at::check_generator(generator); + scalar_t *r__data = result.data_ptr(); + + result.resize_({n}); + int64_t r__stride_0 = result.stride(0); + + at::parallel_for(0, n, internal::GRAIN_SIZE, [&r__data, &r__stride_0](int64_t p_begin, int64_t p_end) { + for (int64_t i = p_begin; i < p_end; i++) r__data[i * r__stride_0] = static_cast(i); + }); + + for (int64_t i = 0; i < n - 1; i++) { + int64_t z = gen->random() % (n - i); + scalar_t sav = r__data[i * r__stride_0]; + r__data[i * r__stride_0] = r__data[(z + i) * r__stride_0]; + r__data[(z + i) * r__stride_0] = sav; + } +} +} // namespace + +Tensor &randperm_generator_out(int64_t n, c10::optional generator, Tensor &result) { + TORCH_CHECK(n >= 0, "n must be non-negative, got", n); + check_supported_max_int_with_precision(n, result); + if (result.device().type() == at::kCUDA) { + auto result_cpu = at::empty({n}, result.options().device(kCPU)); + randperm_generator_out(n, generator, result_cpu); + result.resize_({n}); + return result.copy_(result_cpu); + } + result.resize_({n}); + // See Note [Acquire lock when using random generators] + std::lock_guard lock(generator->mutex()); + AT_DISPATCH_ALL_TYPES_AND(at::ScalarType::Half, result.scalar_type(), "randperm", + [&]() -> void { randperm(result, n, generator); }); + return result; +} + +// ================================================Encrypt/Decrypt===================================================== + +Tensor encrypt_pybind(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode) { + if (input.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::encrypt(input, output, key, cipher, mode); + } else { + TORCH_CHECK(false, TENSOR_DEVICE_TYPE_IS_NOT_SUPPORTED); + } +} + +Tensor decrypt_pybind(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode) { + if (input.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::decrypt(input, output, key, cipher, mode); + } else { + TORCH_CHECK(false, TENSOR_DEVICE_TYPE_IS_NOT_SUPPORTED); + } +} + +// ==================================================================================================================== + +Generator create_random_device_generator(c10::optional token = c10::nullopt) { + if (token.has_value()) { + return make_generator(*token); + } else { + return make_generator(true); + } +} + +Generator create_mt19937_generator(c10::optional seed = c10::nullopt) { + if (seed.has_value()) { + return make_generator(*seed); + } else { + return make_generator(false); + } +} + +bool supports_cuda() { +#ifdef WITH_CUDA + return true; +#else + return false; +#endif +} + +TORCH_LIBRARY_IMPL(aten, CustomRNGKeyId, m) { + // Random + m.impl("random_.from", random_from_to); + m.impl("random_.to", random_to); + m.impl("random_", random_); + // Uniform + m.impl("uniform_", uniform_); + // Normal + m.impl("normal_", normal_); + m.impl("normal.Tensor_float_out", normal_Tensor_float_out); + m.impl("normal.float_Tensor_out", normal_float_Tensor_out); + m.impl("normal.Tensor_Tensor_out", normal_Tensor_Tensor_out); + m.impl("normal.Tensor_float", normal_Tensor_float); + m.impl("normal.float_Tensor", normal_float_Tensor); + m.impl("normal.Tensor_Tensor", normal_Tensor_Tensor); + // Cauchy + m.impl("cauchy_", cauchy_); + // LogNormal + m.impl("log_normal_", log_normal_); + // Geometric + m.impl("geometric_", geometric_); + // Exponential + m.impl("exponential_", exponential_); + // Random permutation + m.impl("randperm.generator_out", randperm_generator_out); +} diff --git a/torchcsprng/csrc/block_cipher.h b/torchcsprng/csrc/block_cipher.h deleted file mode 100644 index 5f1e2cf..0000000 --- a/torchcsprng/csrc/block_cipher.h +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once - -#include "macros.h" -#include -#include -#include "OffsetCalculator.cuh" -#include -#include -#include - -#if defined(__CUDACC__) || defined(__HIPCC__) -#include -#include -#endif - -#if defined(__CUDACC__) || defined(__HIPCC__) -#define UNROLL_IF_CUDA #pragma unroll -#else -#define UNROLL_IF_CUDA -#endif - -namespace torch { -namespace csprng { - -template -TORCH_CSPRNG_HOST_DEVICE static void copy_input_to_block(int64_t idx, uint8_t* block, int block_size, - void* input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc) { - for (auto i = 0; i < block_size / input_type_size; ++i) { - const auto linear_index = idx * (block_size / input_type_size) + i; - if (linear_index < input_numel) { - std::memcpy( - block + i * input_type_size, - &(reinterpret_cast(input_ptr)[input_index_calc(linear_index)]), - input_type_size - ); - } - } -} - -template -TORCH_CSPRNG_HOST_DEVICE static void copy_block_to_output(int64_t idx, uint8_t* block, int output_elem_per_block, - void* output_ptr, int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc) { - for (auto i = 0; i < output_elem_per_block; ++i) { - const auto linear_index = idx * output_elem_per_block + i; - if (linear_index < output_numel) { - std::memcpy( - &(reinterpret_cast(output_ptr)[output_index_calc(linear_index)]), - block + i * output_type_size, - output_type_size - ); - } - } -} - -template -TORCH_CSPRNG_HOST_DEVICE static void block_cipher_kernel_helper( - int64_t idx, cipher_t cipher, int output_elem_per_block, - void* input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, - void* output_ptr, int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc, - transform_t transform) { - uint8_t block[block_size]; - std::memset(&block, 0, block_size); // is it ok to use zeros as padding? - if (input_ptr != nullptr) { - copy_input_to_block(idx, block, block_size, input_ptr, input_numel, input_type_size, input_index_calc); - } - cipher(idx, block); - transform(block); - copy_block_to_output(idx, block, output_elem_per_block, output_ptr, output_numel, output_type_size, output_index_calc); -} - -#if defined(__CUDACC__) || defined(__HIPCC__) -template -__global__ static void block_cipher_kernel_cuda(cipher_t cipher, int output_elem_per_block, - void* input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, - void* output_ptr, int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc, - transform_t transform) { - const auto idx = blockIdx.x * blockDim.x + threadIdx.x; - block_cipher_kernel_helper(idx, cipher, output_elem_per_block, - input_ptr, input_numel, input_type_size, input_index_calc, - output_ptr, output_numel, output_type_size, output_index_calc, - transform); -} -#endif - -template -static void block_cipher_kernel_cpu_serial(int64_t begin, int64_t end, cipher_t cipher, int output_elem_per_block, - void* input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, - void* output_ptr, int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc, - transform_t transform) { - for (auto idx = begin; idx < end; ++idx) { - block_cipher_kernel_helper(idx, cipher, output_elem_per_block, - input_ptr, input_numel, input_type_size, input_index_calc, - output_ptr, output_numel, output_type_size, output_index_calc, - transform); - } -} - -template -static void block_cipher_kernel_cpu(int64_t total, cipher_t cipher, int output_elem_per_block, - void* input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, - void* output_ptr, int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc, - transform_t transform_func) { - if (total < at::internal::GRAIN_SIZE || at::get_num_threads() == 1) { - block_cipher_kernel_cpu_serial(0, total, cipher, output_elem_per_block, - input_ptr, input_numel, input_type_size, input_index_calc, - output_ptr, output_numel, output_type_size, output_index_calc, - transform_func); - } else { - at::parallel_for(0, total, at::internal::GRAIN_SIZE, [&](int64_t begin, int64_t end) { - block_cipher_kernel_cpu_serial(begin, end, cipher, output_elem_per_block, - input_ptr, input_numel, input_type_size, input_index_calc, - output_ptr, output_numel, output_type_size, output_index_calc, - transform_func); - }); - } -} - -template -void block_cipher( - void* input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, - void* output_ptr, int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc, - at::Device device, cipher_t cipher, int output_elem_per_block, transform_t transform_func) { - if (output_ptr == nullptr || output_numel == 0) { - return; - } - - if (device.type() == at::kCPU) { - const auto total = (output_numel + output_elem_per_block - 1) / output_elem_per_block; - block_cipher_kernel_cpu(total, - cipher, output_elem_per_block, - input_ptr, input_numel, input_type_size, input_index_calc, - output_ptr, output_numel, output_type_size, output_index_calc, - transform_func - ); - } else if (device.type() == at::kCUDA) { -#if defined(__CUDACC__) || defined(__HIPCC__) - const auto threads = 256; - const auto grid = (output_numel + (threads * output_elem_per_block) - 1) / (threads * output_elem_per_block); - auto stream = at::cuda::getCurrentCUDAStream(); - block_cipher_kernel_cuda<<>>( - cipher, output_elem_per_block, - input_ptr, input_numel, input_type_size, input_index_calc, - output_ptr, output_numel, output_type_size, output_index_calc, - transform_func - ); - AT_CUDA_CHECK(cudaGetLastError()); -#else - TORCH_CHECK(false, "torchcsprng was compiled without CUDA support"); -#endif - } else { - TORCH_CHECK(false, "block_cipher supports only CPU and CUDA devices"); - } -} - -template -void block_cipher(at::Tensor input, at::Tensor output, cipher_t cipher) { - const auto input_ptr = input.data_ptr(); - const auto input_numel = input.numel(); - - // Otherwise OffsetCalculator/IntDivider crashes with integer division by zero - if (input_ptr == nullptr || input_numel == 0) { - return; - } - - const auto input_type_size = input.element_size(); - const auto input_offset_calc = make_offset_calculator<1>(at::TensorIterator::nullary_op(input)); - const auto input_index_calc = [input_offset_calc] TORCH_CSPRNG_HOST_DEVICE (uint32_t li) -> uint32_t { - return input_offset_calc.get(li)[0]; - }; - - const auto output_ptr = output.data_ptr(); - const auto output_numel = output.numel(); - - // Otherwise OffsetCalculator/IntDivider crashes with integer division by zero - if (output_ptr == nullptr || output_numel == 0) { - return; - } - - const auto output_type_size = output.element_size(); - const auto output_offset_calc = make_offset_calculator<1>(at::TensorIterator::nullary_op(output)); - const auto output_index_calc = [output_offset_calc] TORCH_CSPRNG_HOST_DEVICE (uint32_t li) -> uint32_t { - return output_offset_calc.get(li)[0]; - }; - - const auto device = output.device(); - - torch::csprng::block_cipher( - input_ptr, input_numel, input_type_size, input_index_calc, - output_ptr, output_numel, output_type_size, output_index_calc, - device, cipher, block_size / output_type_size, - [] TORCH_CSPRNG_HOST_DEVICE (uint8_t* x) {}); -} - -}} diff --git a/torchcsprng/csrc/cpu/kernels.cpp b/torchcsprng/csrc/cpu/kernels.cpp deleted file mode 100644 index 395810a..0000000 --- a/torchcsprng/csrc/cpu/kernels.cpp +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "../kernels_commons.h" - -namespace torch { -namespace csprng { -namespace cpu { - -#include "../kernels_body.inc" - -}}} diff --git a/torchcsprng/csrc/cpu/kernels.h b/torchcsprng/csrc/cpu/kernels.h deleted file mode 100644 index b907408..0000000 --- a/torchcsprng/csrc/cpu/kernels.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once - -#include -#include - -namespace torch { -namespace csprng { -namespace cpu { - -#include "../kernels_decls.inc" - -}}} diff --git a/torchcsprng/csrc/csprng.cpp b/torchcsprng/csrc/csprng.cpp deleted file mode 100644 index 8ae61eb..0000000 --- a/torchcsprng/csrc/csprng.cpp +++ /dev/null @@ -1,377 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include - -#include -#include -#include - -#include "kernels_commons.h" -#include "cpu/kernels.h" -#ifdef WITH_CUDA -#include "cuda/kernels.cuh" -#endif - -using namespace at; -using namespace torch::csprng; - -static const auto GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE = "generator does not support tensor device type"; -static const auto TENSOR_DEVICE_TYPE_IS_NOT_SUPPORTED = "tensor device type is not supported"; - -// ==================================================== Random ======================================================== - -Tensor& random_(Tensor& self, c10::optional gen) { - if (self.device().type() == DeviceType::CPU) { - return cpu::random_(self, gen); -#ifdef WITH_CUDA - } else if (self.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::random_(self, gen); -#endif - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -Tensor& random_from_to(Tensor& self, int64_t from, optional to, - c10::optional gen) { - if (self.device().type() == DeviceType::CPU) { - return cpu::random_from_to(self, from, to, gen); -#ifdef WITH_CUDA - } else if (self.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::random_from_to(self, from, to, gen); -#endif - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -Tensor& random_to(Tensor& self, int64_t to, - c10::optional gen) { - if (self.device().type() == DeviceType::CPU) { - return cpu::random_to(self, to, gen); -#ifdef WITH_CUDA - } else if (self.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::random_to(self, to, gen); -#endif - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -// ==================================================== Uniform ======================================================= - -Tensor& uniform_(Tensor& self, double from, double to, c10::optional gen) { - if (self.device().type() == DeviceType::CPU) { - return cpu::uniform_(self, from, to, gen); -#ifdef WITH_CUDA - } else if (self.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::uniform_(self, from, to, gen); -#endif - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -// ==================================================== Normal ======================================================== - -Tensor& normal_(Tensor& self, double mean, double std, c10::optional gen) { - if (self.device().type() == DeviceType::CPU) { - return cpu::normal_(self, mean, std, gen); -#ifdef WITH_CUDA - } else if (self.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::normal_(self, mean, std, gen); -#endif - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -Tensor& normal_Tensor_float_out(const Tensor& mean, double std, c10::optional gen, Tensor& output) { - if (output.device().type() == DeviceType::CPU) { - return cpu::normal_Tensor_float_out(output, mean, std, gen); -#ifdef WITH_CUDA - } else if (output.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::normal_Tensor_float_out(output, mean, std, gen); -#endif - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -Tensor& normal_float_Tensor_out(double mean, const Tensor& std, c10::optional gen, Tensor& output) { - if (output.device().type() == DeviceType::CPU) { - return cpu::normal_float_Tensor_out(output, mean, std, gen); -#ifdef WITH_CUDA - } else if (output.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::normal_float_Tensor_out(output, mean, std, gen); -#endif - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -Tensor& normal_Tensor_Tensor_out(const Tensor& mean, const Tensor& std, c10::optional gen, Tensor& output) { - if (output.device().type() == DeviceType::CPU) { - return cpu::normal_Tensor_Tensor_out(output, mean, std, gen); -#ifdef WITH_CUDA - } else if (output.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::normal_Tensor_Tensor_out(output, mean, std, gen); -#endif - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -Tensor normal_Tensor_float(const Tensor& mean, double std, c10::optional gen) { - if (mean.device().type() == DeviceType::CPU) { - return cpu::normal_Tensor_float(mean, std, gen); -#ifdef WITH_CUDA - } else if (mean.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::normal_Tensor_float(mean, std, gen); -#endif - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -Tensor normal_float_Tensor(double mean, const Tensor& std, c10::optional gen) { - if (std.device().type() == DeviceType::CPU) { - return cpu::normal_float_Tensor(mean, std, gen); -#ifdef WITH_CUDA - } else if (std.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::normal_float_Tensor(mean, std, gen); -#endif - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -Tensor normal_Tensor_Tensor(const Tensor& mean, const Tensor& std, c10::optional gen) { - if (mean.device().type() == DeviceType::CPU) { - return cpu::normal_Tensor_Tensor(mean, std, gen); -#ifdef WITH_CUDA - } else if (mean.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::normal_Tensor_Tensor(mean, std, gen); -#endif - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -// ==================================================== Cauchy ======================================================== - -Tensor& cauchy_(Tensor& self, double median, double sigma, c10::optional gen) { - if (self.device().type() == DeviceType::CPU) { - return cpu::cauchy_(self, median, sigma, gen); -#ifdef WITH_CUDA - } else if (self.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::cauchy_(self, median, sigma, gen); -#endif - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -// ================================================== LogNormal ======================================================= - -Tensor& log_normal_(Tensor& self, double mean, double std, c10::optional gen) { - if (self.device().type() == DeviceType::CPU) { - return cpu::log_normal_(self, mean, std, gen); -#ifdef WITH_CUDA - } else if (self.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::log_normal_(self, mean, std, gen); -#endif - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -// ================================================== Geometric ======================================================= - -Tensor& geometric_(Tensor& self, double p, c10::optional gen) { - if (self.device().type() == DeviceType::CPU) { - return cpu::geometric_(self, p, gen); -#ifdef WITH_CUDA - } else if (self.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::geometric_(self, p, gen); -#endif - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -// ================================================== Exponential ===================================================== - -Tensor& exponential_(Tensor& self, double lambda, c10::optional gen) { - if (self.device().type() == DeviceType::CPU) { - return cpu::exponential_(self, lambda, gen); -#ifdef WITH_CUDA - } else if (self.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::exponential_(self, lambda, gen); -#endif - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -// =============================================== Random permutation ================================================= - -// randperm implementation was copied from PyTorch to unblock CSPRNG users, but ultimately CSPRNG must reuse -// refactored randperm from PyTorch, see https://github.com/pytorch/pytorch/issues/43816 - -namespace { - - inline void check_supported_max_int_with_precision(int64_t n, const Tensor& tensor) { - TORCH_CHECK(at::scalar_tensor(n, tensor.options()).defined(), - "n is too large for result tensor type: '", tensor.toString(), "'"); - - // Ensure sufficient precision for floating point representation. - switch (tensor.scalar_type()) { - case at::ScalarType::Half: - TORCH_CHECK(n <= (int64_t(1) << 11) + 1, "n cannot be greater than 2049 for Half type."); - break; - case at::ScalarType::Float: - TORCH_CHECK(n <= (int64_t(1) << 24) + 1, "n cannot be greater than 2^24+1 for Float type."); - break; - case at::ScalarType::Double: // Unlikely to happen, but doesn't hurt to check - TORCH_CHECK(n <= (int64_t(1) << 53) + 1, "n cannot be greater than 2^53+1 for Double type."); - break; - default: - break; - } - } - - template - void randperm(Tensor& result, int64_t n, c10::optional generator) { - auto gen = at::check_generator(generator); - scalar_t *r__data = result.data_ptr(); - - result.resize_({n}); - int64_t r__stride_0 = result.stride(0); - - at::parallel_for(0, n, internal::GRAIN_SIZE, - [&r__data, &r__stride_0](int64_t p_begin, int64_t p_end) { - for(int64_t i = p_begin; i < p_end; i++) - r__data[i*r__stride_0] = static_cast(i); - }); - - for(int64_t i = 0; i < n - 1; i++) - { - int64_t z = gen->random() % (n-i); - scalar_t sav = r__data[i*r__stride_0]; - r__data[i*r__stride_0] = r__data[(z+i)*r__stride_0]; - r__data[(z+i)*r__stride_0] = sav; - } - } -} // namespace - -Tensor& randperm_generator_out(int64_t n, c10::optional generator, Tensor& result) { - TORCH_CHECK(n >= 0, "n must be non-negative, got", n); - check_supported_max_int_with_precision(n, result); - if (result.device().type() == at::kCUDA) { - auto result_cpu = at::empty({n}, result.options().device(kCPU)); - randperm_generator_out(n, generator, result_cpu); - result.resize_({n}); - return result.copy_(result_cpu); - } - result.resize_({n}); - // See Note [Acquire lock when using random generators] - std::lock_guard lock(generator->mutex()); - AT_DISPATCH_ALL_TYPES_AND(at::ScalarType::Half, result.scalar_type(), "randperm", [&]() -> void { - randperm(result, n, generator); - }); - return result; -} - -// ================================================Encrypt/Decrypt===================================================== - -Tensor encrypt_pybind(Tensor input, Tensor output, Tensor key, const std::string& cipher, const std::string& mode) { - if (input.device().type() == DeviceType::CPU) { - return cpu::encrypt(input, output, key, cipher, mode); -#ifdef WITH_CUDA - } else if (input.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::encrypt(input, output, key, cipher, mode); -#endif - } else { - TORCH_CHECK(false, TENSOR_DEVICE_TYPE_IS_NOT_SUPPORTED); - } -} - -Tensor decrypt_pybind(Tensor input, Tensor output, Tensor key, const std::string& cipher, const std::string& mode) { - if (input.device().type() == DeviceType::CPU) { - return cpu::decrypt(input, output, key, cipher, mode); -#ifdef WITH_CUDA - } else if (input.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::decrypt(input, output, key, cipher, mode); -#endif - } else { - TORCH_CHECK(false, TENSOR_DEVICE_TYPE_IS_NOT_SUPPORTED); - } -} - -// ==================================================================================================================== - -Generator create_random_device_generator(c10::optional token = c10::nullopt) { - if (token.has_value()) { - return make_generator(*token); - } else { - return make_generator(true); - } -} - -Generator create_mt19937_generator(c10::optional seed = c10::nullopt) { - if (seed.has_value()) { - return make_generator(*seed); - } else { - return make_generator(false); - } -} - -bool supports_cuda() { -#ifdef WITH_CUDA - return true; -#else - return false; -#endif -} - -TORCH_LIBRARY_IMPL(aten, CustomRNGKeyId, m) { - // Random - m.impl("random_.from", random_from_to); - m.impl("random_.to", random_to); - m.impl("random_", random_); - // Uniform - m.impl("uniform_", uniform_); - // Normal - m.impl("normal_", normal_); - m.impl("normal.Tensor_float_out", normal_Tensor_float_out); - m.impl("normal.float_Tensor_out", normal_float_Tensor_out); - m.impl("normal.Tensor_Tensor_out", normal_Tensor_Tensor_out); - m.impl("normal.Tensor_float", normal_Tensor_float); - m.impl("normal.float_Tensor", normal_float_Tensor); - m.impl("normal.Tensor_Tensor", normal_Tensor_Tensor); - // Cauchy - m.impl("cauchy_", cauchy_); - // LogNormal - m.impl("log_normal_", log_normal_); - // Geometric - m.impl("geometric_", geometric_); - // Exponential - m.impl("exponential_", exponential_); - // Random permutation - m.impl("randperm.generator_out", randperm_generator_out); -} - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("supports_cuda", &supports_cuda); - m.def("create_random_device_generator", &create_random_device_generator, py::arg("token") = nullptr); - m.def("create_mt19937_generator", &create_mt19937_generator, py::arg("seed") = nullptr); - m.def("encrypt", &encrypt_pybind); - m.def("decrypt", &decrypt_pybind); -} diff --git a/torchcsprng/csrc/cuda/kernels.cu b/torchcsprng/csrc/cuda/kernels.cu deleted file mode 100644 index da57d0a..0000000 --- a/torchcsprng/csrc/cuda/kernels.cu +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "../kernels_commons.h" - -namespace torch { -namespace csprng { -namespace cuda { - -#include "../kernels_body.inc" - -}}} diff --git a/torchcsprng/csrc/cuda/kernels.cuh b/torchcsprng/csrc/cuda/kernels.cuh deleted file mode 100644 index 586f1fd..0000000 --- a/torchcsprng/csrc/cuda/kernels.cuh +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once - -#include -#include - -namespace torch { -namespace csprng { -namespace cuda { - -#include "../kernels_decls.inc" - -}}} diff --git a/torchcsprng/csrc/kernels_body.inc b/torchcsprng/csrc/kernels_body.inc deleted file mode 100644 index 097ae09..0000000 --- a/torchcsprng/csrc/kernels_body.inc +++ /dev/null @@ -1,437 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "aes.inc" - -// Generates `block_t_size`-bytes random key Tensor on CPU -// using `generator`, which must be an instance of `at::CPUGeneratorImpl` -// and passes it to the `device`. -template -at::Tensor key_tensor(size_t block_t_size, c10::optional generator) { - std::lock_guard lock(generator->mutex()); - auto gen = at::check_generator(generator); - auto key = torch::empty({static_cast(block_t_size)}, torch::kUInt8); - using random_t = typename std::result_of::type; - constexpr size_t random_t_size = sizeof(random_t); - for (size_t i = 0; i < block_t_size / random_t_size; i++) { - const auto rand = gen->random(); - for (size_t j = 0; j < random_t_size; j++) { - size_t k = i * random_t_size + j; - key[k] = static_cast((rand >> (j * 8)) & 0xff); - } - } - return key; -} - -template -at::Tensor aes128_key_tensor(at::Generator generator) { - return key_tensor(aes::block_t_size, generator); -} - -// ==================================================================================================================== - -// A simple container for random state sub-blocks that implements RNG interface -// with random() and random64() methods, that are used by transformation function -template -struct RNGValues { - TORCH_CSPRNG_HOST_DEVICE RNGValues(uint64_t* vals) { - memcpy(&vals_, vals, size * sizeof(uint64_t)); - } - uint32_t TORCH_CSPRNG_HOST_DEVICE random() { auto res = static_cast(vals_[index]); index++; return res; } - uint64_t TORCH_CSPRNG_HOST_DEVICE random64() { auto res = vals_[index]; index++; return res; } -private: - uint64_t vals_[size]; - int index = 0; -}; - -// Applies AES in CTR mode with the `key` for passed TensorIterator iter. -// `scalar_t` is a scalar type equivalent of target tensor dtype -// `uint_t` is an unsigned integral type of sub-blocks that random state is divided to -// (e.g, 16 bytes random state block can be divided into 16 uint8_t sub-blocks -// or 8 uint16_t sub-block or 4 uint32_t sub-block or 2 uint64_t sub-blocks) -// `N` is a number of sub-block which is used by `transform_func` -// to generate a random value of specific distribution (e.g. `normal` uses 2) -// `key` is a CUDA pointer to random key memory block -// `transform_func` is a callable that converts N `uint_t` random state sub-blocks passed in RNGValues into target dtype `scalar_t` -template -void aes_helper(at::TensorIterator& iter, const uint8_t* key_bytes, transform_t transform_func) { - auto output = iter.tensor(0); - const auto output_offset_calc = make_offset_calculator<1>(at::TensorIterator::nullary_op(output)); - const auto output_index_calc = [output_offset_calc] TORCH_CSPRNG_HOST_DEVICE (uint32_t li) -> uint32_t { - return output_offset_calc.get(li)[0]; - }; - torch::csprng::block_cipher( - nullptr, 0, 0, output_index_calc, - output.data_ptr(), output.numel(), output.element_size(), output_index_calc, - iter.device_type(), - [key_bytes] TORCH_CSPRNG_HOST_DEVICE (int64_t idx, uint8_t* block) -> void { - uint8_t idx_block[aes::block_t_size]; - std::memset(&idx_block, 0, aes::block_t_size); - *(reinterpret_cast(idx_block)) = idx; - aes::encrypt(idx_block, key_bytes); - for (size_t i = 0; i < aes::block_t_size; i++) { - block[i] ^= idx_block[i]; - } - }, - aes::block_t_size / (N * sizeof(uint_t)), - [transform_func] TORCH_CSPRNG_HOST_DEVICE (uint8_t* block) { - const auto n = aes::block_t_size / (N * sizeof(uint_t)); - for (size_t i = 0; i < n; ++i) { - uint64_t vals[N]; - for (size_t j = 0; j < N; ++j) { - vals[j] = (reinterpret_cast(block))[N * i + j]; - } - RNGValues rng(vals); - reinterpret_cast(block)[i] = transform_func(&rng); - } - } - ); -} - -// ==================================================================================================================== - -// A mapping between scalar type and corresponding unsigned integer type of random state sub-block. -// uint64_t for double and long, uint32_t for the rest -template -struct UIntType {}; - -template <> struct UIntType { using type = uint64_t; }; -template <> struct UIntType { using type = uint32_t; }; -template <> struct UIntType { using type = uint16_t; }; -template <> struct UIntType { using type = uint16_t; }; -template <> struct UIntType { using type = uint64_t; }; -template <> struct UIntType { using type = uint32_t; }; -template <> struct UIntType { using type = uint32_t; }; -template <> struct UIntType { using type = uint32_t; }; -template <> struct UIntType { using type = uint32_t; }; -template <> struct UIntType { using type = uint32_t; }; - -// ==================================================== Random ======================================================== - -template -struct RandomKernel { - void operator()(TensorIterator& iter, c10::optional generator) { - const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); - const auto key = key_t.data_ptr(); - AT_DISPATCH_ALL_TYPES_AND3(at::ScalarType::Bool, at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "random_kernel", [&] { - aes_helper::type>(iter, key, - [] TORCH_CSPRNG_HOST_DEVICE (RNGValues<1>* generator) -> scalar_t { - uniform_int_distribution random; - return random(generator); - } - ); - }); - } -}; - -template -void random_from_to_kernel_helper(TensorIterator& iter, uint64_t range, int64_t base, const uint8_t* key) { - aes_helper(iter, key, - [range, base] TORCH_CSPRNG_HOST_DEVICE (RNGValues<1>* generator) -> scalar_t { - uniform_int_from_to_distribution random(range, base); - return random(generator); - } - ); -} - -template -void random_full_range_kernel_helper(TensorIterator& iter, const uint8_t* key) { - aes_helper(iter, key, - [] TORCH_CSPRNG_HOST_DEVICE (RNGValues<1>* generator) -> scalar_t { - uniform_int_full_range_distribution random; - return random(generator); - } - ); -} - -template -struct RandomFromToKernel { - void operator()(TensorIterator& iter, uint64_t range, int64_t base, c10::optional generator) { - const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); - const auto key = key_t.data_ptr(); - AT_DISPATCH_ALL_TYPES_AND3(at::ScalarType::Bool, at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "random_from_to_kernel", [&] { - if (( - std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same::value)/* TODO: && range >= 1ULL << 32*/) - { - random_from_to_kernel_helper(iter, range, base, key); - } else { - random_from_to_kernel_helper(iter, range, base, key); - } - }); - } - void operator()(TensorIterator& iter, c10::optional generator) { - const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); - const auto key = key_t.data_ptr(); - AT_DISPATCH_ALL_TYPES_AND3(at::ScalarType::Bool, at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "random_full_64_bits_range_kernel", [&] { - if (std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same::value) - { - random_full_range_kernel_helper(iter, key); - } else { - TORCH_CHECK(false, "random_full_64_bits_range_kernel_cuda handles only int64, double, float and bfloat16"); - } - }); - } -}; - -at::Tensor& random_(at::Tensor& self, c10::optional generator) { - return at::native::templates::random_impl(self, generator); -} - -at::Tensor& random_from_to(at::Tensor& self, int64_t from, c10::optional to, c10::optional generator) { - return at::native::templates::random_from_to_impl(self, from, to, generator); -} - -at::Tensor& random_to(at::Tensor& self, int64_t to, c10::optional generator) { - return random_from_to(self, 0, to, generator); -} - -// ==================================================== Uniform ======================================================= - -template -struct UniformKernel { - void operator()(TensorIterator& iter, double from, double to, c10::optional generator) { - const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); - const auto key = key_t.data_ptr(); - AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "uniform_kernel", [&] { - aes_helper(iter, key, - [from, to] TORCH_CSPRNG_HOST_DEVICE (RNGValues<1>* generator) -> scalar_t { - uniform_real_distribution uniform(from, to); - return static_cast(uniform(generator)); - } - ); - }); - } -}; - -at::Tensor& uniform_(at::Tensor& self, double from, double to, c10::optional generator) { - return at::native::templates::uniform_impl_(self, from, to, generator); -} - -// ==================================================== Normal ======================================================== - -template -struct NormalKernel { - void operator()(Tensor& self, double mean, double std, c10::optional generator) { - auto iter = TensorIterator::nullary_op(self); - const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); - const auto key = key_t.data_ptr(); - AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "normal_kernel", [&] { - aes_helper(iter, key, - [mean, std] TORCH_CSPRNG_HOST_DEVICE (RNGValues<2>* gen) -> scalar_t { - normal_distribution normal(mean, std); - return static_cast(normal(gen)); - } - ); - }); - } -}; - -at::Tensor& normal_(at::Tensor& self, double mean, double std, c10::optional generator) { - return at::native::templates::normal_impl_(self, mean, std, generator); -} - -at::Tensor& normal_Tensor_float_out(at::Tensor& output, const at::Tensor& mean, double std, c10::optional gen) { - return at::native::templates::normal_out_impl(output, mean, std, gen); -} - -at::Tensor& normal_float_Tensor_out(at::Tensor& output, double mean, const at::Tensor& std, c10::optional gen) { - return at::native::templates::normal_out_impl(output, mean, std, gen); -} - -at::Tensor& normal_Tensor_Tensor_out(at::Tensor& output, const at::Tensor& mean, const at::Tensor& std, c10::optional gen) { - return at::native::templates::normal_out_impl(output, mean, std, gen); -} - -at::Tensor normal_Tensor_float(const at::Tensor& mean, double std, c10::optional gen) { - return at::native::templates::normal_impl(mean, std, gen); -} - -at::Tensor normal_float_Tensor(double mean, const at::Tensor& std, c10::optional gen) { - return at::native::templates::normal_impl(mean, std, gen); -} - -at::Tensor normal_Tensor_Tensor(const at::Tensor& mean, const at::Tensor& std, c10::optional gen) { - return at::native::templates::normal_impl(mean, std, gen); -} - -// ==================================================== Cauchy ======================================================== - -template -struct CauchyKernel { - void operator()(TensorIterator& iter, double median, double sigma, c10::optional generator) { - const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); - const auto key = key_t.data_ptr(); - AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "cauchy_kernel", [&] { - aes_helper(iter, key, - [median, sigma] TORCH_CSPRNG_HOST_DEVICE (RNGValues<1>* gen) -> scalar_t { - cauchy_distribution cauchy(median, sigma); - return static_cast(cauchy(gen)); - } - ); - }); - } -}; - -at::Tensor& cauchy_(at::Tensor& self, double median, double sigma, c10::optional generator) { - return at::native::templates::cauchy_impl_(self, median, sigma, generator); -} - -// ================================================== LogNormal ======================================================= - -template -struct LogNormalKernel { - void operator()(TensorIterator& iter, double mean, double std, c10::optional generator) { - const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); - const auto key = key_t.data_ptr(); - AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "log_normal", [&] { - aes_helper(iter, key, - [mean, std] TORCH_CSPRNG_HOST_DEVICE (RNGValues<2>* gen) -> scalar_t { - lognormal_distribution logNormal(mean, std); - return static_cast(logNormal(gen)); - } - ); - }); - } -}; - -at::Tensor& log_normal_(at::Tensor& self, double mean, double std, c10::optional gen) { - return at::native::templates::log_normal_impl_(self, mean, std, gen); -} - -// ================================================== Geometric ======================================================= - -template -struct GeometricKernel { - void operator()(TensorIterator& iter, double p, c10::optional generator) { - const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); - const auto key = key_t.data_ptr(); - AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "geometric_kernel", [&] { - aes_helper::type, 1>(iter, key, - [p] TORCH_CSPRNG_HOST_DEVICE (RNGValues<1>* gen) -> scalar_t { - geometric_distribution geometric(p); - return geometric(gen); - } - ); - }); - } -}; - -at::Tensor& geometric_(at::Tensor& self, double p, c10::optional gen) { - return at::native::templates::geometric_impl_(self, p, gen); -} - -// ================================================== Exponential ===================================================== - -template -struct ExponentialKernel { - void operator()(TensorIterator& iter, double lambda, c10::optional generator) { - const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); - const auto key = key_t.data_ptr(); - AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "exponential_kernel", [&] { - aes_helper(iter, key, - [lambda] TORCH_CSPRNG_HOST_DEVICE (RNGValues<1>* gen) -> scalar_t { - exponential_distribution exponential(lambda); - return static_cast(exponential(gen)); - } - ); - }); - } -}; - -at::Tensor& exponential_(at::Tensor& self, double lambda, c10::optional gen) { - return at::native::templates::exponential_impl_(self, lambda, gen); -} - -// ================================================Encrypt/Decrypt===================================================== - -void check_cipher(const std::string& cipher, Tensor key) { - if (cipher == "aes128") { - TORCH_CHECK(key.element_size() * key.numel() == 16, "key tensor must have 16 bytes(128 bits)"); - } else { - TORCH_CHECK(false, "encrypt/decrypt supports \"aes128\" cipher, \"", cipher, "\" is not supported."); - } -} - -void aes_ecb_encrypt(Tensor input, Tensor output, uint8_t* key_bytes) { - block_cipher(input, output, - [key_bytes] TORCH_CSPRNG_HOST_DEVICE (int64_t idx, uint8_t* block) -> void { - aes::encrypt(block, key_bytes); - } - ); -} - -void aes_ecb_decrypt(Tensor input, Tensor output, uint8_t* key_bytes) { - block_cipher(input, output, - [key_bytes] TORCH_CSPRNG_HOST_DEVICE (int64_t idx, uint8_t* block) -> void { - aes::decrypt(block, key_bytes); - } - ); -} - -void aes_ctr_encrypt(Tensor input, Tensor output, uint8_t* key_bytes) { - block_cipher(input, output, - [key_bytes] TORCH_CSPRNG_HOST_DEVICE (int64_t idx, uint8_t* block) -> void { - uint8_t idx_block[aes::block_t_size]; - std::memset(&idx_block, 0, aes::block_t_size); - *(reinterpret_cast(idx_block)) = idx; - aes::encrypt(idx_block, key_bytes); - for (size_t i = 0; i < aes::block_t_size; i++) { - block[i] ^= idx_block[i]; - } - } - ); -} - -void aes_ctr_decrypt(Tensor input, Tensor output, uint8_t* key_bytes) { - aes_ctr_encrypt(input, output, key_bytes); -} - -Tensor encrypt(Tensor input, Tensor output, Tensor key, const std::string& cipher, const std::string& mode) { - TORCH_CHECK(input.device() == output.device() && input.device() == key.device(), "input, output and key tensors must have the same device"); - const auto output_size_bytes = output.numel() * output.itemsize(); - const auto input_size_bytes = input.numel() * input.itemsize(); - const auto input_size_bytes_rounded = (input_size_bytes + aes::block_t_size - 1) / aes::block_t_size * aes::block_t_size; - TORCH_CHECK(output_size_bytes == input_size_bytes_rounded, - "output size in bytes(", output_size_bytes, - ") is not equal to input size in bytes rounded to block size(", - input_size_bytes_rounded, ")"); - check_cipher(cipher, key); - const auto key_bytes = reinterpret_cast(key.contiguous().data_ptr()); - if (mode == "ecb") { - aes_ecb_encrypt(input, output, key_bytes); - } else if (mode == "ctr") { - aes_ctr_encrypt(input, output, key_bytes); - } else { - TORCH_CHECK(false, "encrypt/decrypt supports \"ecb\" and \"ctr\" modes, \"", mode, "\" is not supported."); - } - return output; -} - -Tensor decrypt(Tensor input, Tensor output, Tensor key, const std::string& cipher, const std::string& mode) { - TORCH_CHECK(input.device() == output.device() && input.device() == key.device(), "input, output and key tensors must have the same device"); - const auto output_size_bytes = output.numel() * output.itemsize(); - const auto input_size_bytes = input.numel() * input.itemsize(); - const auto diff = input_size_bytes - output_size_bytes; - TORCH_CHECK(0 <= diff && diff < aes::block_t_size, "output tensor size in bytes must be less then or equal to input tensor size in bytes, the difference must be less than block size"); - TORCH_CHECK(input_size_bytes % aes::block_t_size == 0, "input tensor size in bytes must divisible by cipher block size in bytes"); - check_cipher(cipher, key); - const auto key_bytes = reinterpret_cast(key.contiguous().data_ptr()); - if (mode == "ecb") { - aes_ecb_decrypt(input, output, key_bytes); - } else if (mode == "ctr") { - aes_ctr_decrypt(input, output, key_bytes); - } else { - TORCH_CHECK(false, "encrypt/decrypt supports \"ecb\" and \"ctr\" modes, \"", mode, "\" is not supported."); - } - return output; -} diff --git a/torchcsprng/csrc/kernels_commons.h b/torchcsprng/csrc/kernels_commons.h deleted file mode 100644 index 3e74d35..0000000 --- a/torchcsprng/csrc/kernels_commons.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include "macros.h" -#include "block_cipher.h" - -inline uint64_t make64BitsFrom32Bits(uint32_t hi, uint32_t lo) { - return (static_cast(hi) << 32) | lo; -} - -// CUDA CSPRNG is actually CPU generator which is used only to generate a random key on CPU for AES running in a block mode on CUDA -struct CSPRNGGeneratorImpl : public c10::GeneratorImpl { - CSPRNGGeneratorImpl(bool use_rd) : c10::GeneratorImpl{at::Device(at::DeviceType::CPU), at::DispatchKeySet(at::DispatchKey::CustomRNGKeyId)}, use_rd_{use_rd} {} - CSPRNGGeneratorImpl(const std::string& token) : c10::GeneratorImpl{at::Device(at::DeviceType::CPU), at::DispatchKeySet(at::DispatchKey::CustomRNGKeyId)}, use_rd_{true}, rd_{token} {} - CSPRNGGeneratorImpl(uint64_t seed) : c10::GeneratorImpl{at::Device(at::DeviceType::CPU), at::DispatchKeySet(at::DispatchKey::CustomRNGKeyId)}, use_rd_{false}, mt_{static_cast(seed)} { } - ~CSPRNGGeneratorImpl() = default; - uint32_t random() { return use_rd_ ? rd_() : mt_(); } - uint64_t random64() { return use_rd_ ? make64BitsFrom32Bits(rd_(), rd_()) : make64BitsFrom32Bits(mt_(), mt_()); } - - void set_current_seed(uint64_t seed) override { throw std::runtime_error("not implemented"); } - uint64_t current_seed() const override { throw std::runtime_error("not implemented"); } - uint64_t seed() override { throw std::runtime_error("not implemented"); } - CSPRNGGeneratorImpl* clone_impl() const override { throw std::runtime_error("not implemented"); } - - static at::DeviceType device_type() { return at::DeviceType::CPU; } - - void set_state(const c10::TensorImpl& new_state) override { throw std::runtime_error("not implemented"); } - c10::intrusive_ptr get_state() const override { throw std::runtime_error("not implemented"); } - - void set_offset(uint64_t offset) override { throw std::runtime_error("not implemented"); } - uint64_t get_offset() const override { throw std::runtime_error("not implenented"); } - bool use_rd_; - std::random_device rd_; - std::mt19937 mt_; -}; diff --git a/torchcsprng/csrc/kernels_decls.inc b/torchcsprng/csrc/kernels_decls.inc deleted file mode 100644 index 5fa9299..0000000 --- a/torchcsprng/csrc/kernels_decls.inc +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -// ==================================================== Random ======================================================== - -at::Tensor& random_(at::Tensor& self, c10::optional generator); - -at::Tensor& random_from_to(at::Tensor& self, int64_t from, optional to, c10::optional generator); - -at::Tensor& random_to(at::Tensor& self, int64_t to, c10::optional generator); - -// ==================================================== Uniform ======================================================= - -at::Tensor& uniform_(at::Tensor& self, double from, double to, c10::optional generator); - -// ==================================================== Normal ======================================================== - -at::Tensor& normal_(at::Tensor& self, double mean, double std, c10::optional generator); - -at::Tensor& normal_Tensor_float_out(at::Tensor& output, const at::Tensor& mean, double std, c10::optional gen); - -at::Tensor& normal_float_Tensor_out(at::Tensor& output, double mean, const at::Tensor& std, c10::optional gen); - -at::Tensor& normal_Tensor_Tensor_out(at::Tensor& output, const at::Tensor& mean, const at::Tensor& std, c10::optional gen); - -at::Tensor normal_Tensor_float(const at::Tensor& mean, double std, c10::optional gen); - -at::Tensor normal_float_Tensor(double mean, const at::Tensor& std, c10::optional gen); - -at::Tensor normal_Tensor_Tensor(const at::Tensor& mean, const at::Tensor& std, c10::optional gen); - -// ==================================================== Cauchy ======================================================== - -at::Tensor& cauchy_(at::Tensor& self, double median, double sigma, c10::optional generator); - -// ================================================== LogNormal ======================================================= - -at::Tensor& log_normal_(at::Tensor& self, double mean, double std, c10::optional gen); - -// ================================================== Geometric ======================================================= - -at::Tensor& geometric_(at::Tensor& self, double p, c10::optional gen); - -// ================================================== Exponential ===================================================== - -at::Tensor& exponential_(at::Tensor& self, double lambda, c10::optional gen); - -// ================================================Encrypt/Decrypt===================================================== - -Tensor encrypt(Tensor input, Tensor output, Tensor key, const std::string& cipher, const std::string& mode); - -Tensor decrypt(Tensor input, Tensor output, Tensor key, const std::string& cipher, const std::string& mode); diff --git a/torchcsprng/kernels.cu b/torchcsprng/kernels.cu new file mode 100644 index 0000000..65bb01f --- /dev/null +++ b/torchcsprng/kernels.cu @@ -0,0 +1,484 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "kernels_commons.h" + +namespace torch { +namespace csprng { +namespace cuda { + +// The original kernels_body.inc starts here + +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "aes.h" + +// Generates `block_t_size`-bytes random key Tensor on CPU +// using `generator`, which must be an instance of `at::CPUGeneratorImpl` +// and passes it to the `device`. +template +at::Tensor key_tensor(size_t block_t_size, c10::optional generator) { + std::lock_guard lock(generator->mutex()); + auto gen = at::check_generator(generator); + auto key = torch::empty({static_cast(block_t_size)}, torch::kUInt8); + using random_t = typename std::result_of::type; + constexpr size_t random_t_size = sizeof(random_t); + for (size_t i = 0; i < block_t_size / random_t_size; i++) { + const auto rand = gen->random(); + for (size_t j = 0; j < random_t_size; j++) { + size_t k = i * random_t_size + j; + key[k] = static_cast((rand >> (j * 8)) & 0xff); + } + } + return key; +} + +template +at::Tensor aes128_key_tensor(at::Generator generator) { + return key_tensor(aes::block_t_size, generator); +} + +// ==================================================================================================================== + +// A simple container for random state sub-blocks that implements RNG interface +// with random() and random64() methods, that are used by transformation function +template +struct RNGValues { + TORCH_CSPRNG_HOST_DEVICE RNGValues(uint64_t *vals) { + memcpy(&vals_, vals, size * sizeof(uint64_t)); + } + uint32_t TORCH_CSPRNG_HOST_DEVICE random() { + auto res = static_cast(vals_[index]); + index++; + return res; + } + uint64_t TORCH_CSPRNG_HOST_DEVICE random64() { + auto res = vals_[index]; + index++; + return res; + } + + private: + uint64_t vals_[size]; + int index = 0; +}; + +// Applies AES in CTR mode with the `key` for passed TensorIterator iter. +// `scalar_t` is a scalar type equivalent of target tensor dtype +// `uint_t` is an unsigned integral type of sub-blocks that random state is divided to +// (e.g, 16 bytes random state block can be divided into 16 uint8_t sub-blocks +// or 8 uint16_t sub-block or 4 uint32_t sub-block or 2 uint64_t sub-blocks) +// `N` is a number of sub-block which is used by `transform_func` +// to generate a random value of specific distribution (e.g. `normal` uses 2) +// `key` is a CUDA pointer to random key memory block +// `transform_func` is a callable that converts N `uint_t` random state sub-blocks passed in RNGValues into target dtype +// `scalar_t` +template +void aes_helper(at::TensorIterator &iter, const uint8_t *key_bytes, transform_t transform_func) { + auto output = iter.tensor(0); + const auto output_offset_calc = make_offset_calculator<1>(at::TensorIterator::nullary_op(output)); + const auto output_index_calc = [output_offset_calc] TORCH_CSPRNG_HOST_DEVICE( + uint32_t li) -> uint32_t { return output_offset_calc.get(li)[0]; }; + torch::csprng::block_cipher( + nullptr, 0, 0, output_index_calc, output.data_ptr(), output.numel(), output.element_size(), output_index_calc, + iter.device_type(), + [key_bytes] TORCH_CSPRNG_HOST_DEVICE(int64_t idx, uint8_t * block) -> void { + uint8_t idx_block[aes::block_t_size]; + std::memset(&idx_block, 0, aes::block_t_size); + *(reinterpret_cast(idx_block)) = idx; + aes::encrypt(idx_block, key_bytes); + for (size_t i = 0; i < aes::block_t_size; i++) { + block[i] ^= idx_block[i]; + } + }, + aes::block_t_size / (N * sizeof(uint_t)), + [transform_func] TORCH_CSPRNG_HOST_DEVICE(uint8_t * block) { + const auto n = aes::block_t_size / (N * sizeof(uint_t)); + for (size_t i = 0; i < n; ++i) { + uint64_t vals[N]; + for (size_t j = 0; j < N; ++j) { + vals[j] = (reinterpret_cast(block))[N * i + j]; + } + RNGValues rng(vals); + reinterpret_cast(block)[i] = transform_func(&rng); + } + }); +} + +// ==================================================================================================================== + +// A mapping between scalar type and corresponding unsigned integer type of random state sub-block. +// uint64_t for double and long, uint32_t for the rest +template +struct UIntType {}; + +template <> +struct UIntType { + using type = uint64_t; +}; +template <> +struct UIntType { + using type = uint32_t; +}; +template <> +struct UIntType { + using type = uint16_t; +}; +template <> +struct UIntType { + using type = uint16_t; +}; +template <> +struct UIntType { + using type = uint64_t; +}; +template <> +struct UIntType { + using type = uint32_t; +}; +template <> +struct UIntType { + using type = uint32_t; +}; +template <> +struct UIntType { + using type = uint32_t; +}; +template <> +struct UIntType { + using type = uint32_t; +}; +template <> +struct UIntType { + using type = uint32_t; +}; + +// ==================================================== Random ======================================================== + +template +struct RandomKernel { + void operator()(TensorIterator &iter, c10::optional generator) { + const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); + const auto key = key_t.data_ptr(); + AT_DISPATCH_ALL_TYPES_AND3( + at::ScalarType::Bool, at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "random_kernel", [&] { + aes_helper::type>( + iter, key, [] TORCH_CSPRNG_HOST_DEVICE(RNGValues<1> * generator) -> scalar_t { + uniform_int_distribution random; + return random(generator); + }); + }); + } +}; + +template +void random_from_to_kernel_helper(TensorIterator &iter, uint64_t range, int64_t base, const uint8_t *key) { + aes_helper(iter, key, [range, base] TORCH_CSPRNG_HOST_DEVICE(RNGValues<1> * generator) -> scalar_t { + uniform_int_from_to_distribution random(range, base); + return random(generator); + }); +} + +template +void random_full_range_kernel_helper(TensorIterator &iter, const uint8_t *key) { + aes_helper(iter, key, [] TORCH_CSPRNG_HOST_DEVICE(RNGValues<1> * generator) -> scalar_t { + uniform_int_full_range_distribution random; + return random(generator); + }); +} + +template +struct RandomFromToKernel { + void operator()(TensorIterator &iter, uint64_t range, int64_t base, c10::optional generator) { + const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); + const auto key = key_t.data_ptr(); + AT_DISPATCH_ALL_TYPES_AND3( + at::ScalarType::Bool, at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "random_from_to_kernel", [&] { + if ((std::is_same::value || std::is_same::value || + std::is_same::value || + std::is_same::value) /* TODO: && range >= 1ULL << 32*/) { + random_from_to_kernel_helper(iter, range, base, key); + } else { + random_from_to_kernel_helper(iter, range, base, key); + } + }); + } + void operator()(TensorIterator &iter, c10::optional generator) { + const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); + const auto key = key_t.data_ptr(); + AT_DISPATCH_ALL_TYPES_AND3(at::ScalarType::Bool, at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), + "random_full_64_bits_range_kernel", [&] { + if (std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value) { + random_full_range_kernel_helper(iter, key); + } else { + TORCH_CHECK(false, "random_full_64_bits_range_kernel_cuda handles only int64, double, float and bfloat16"); + } + }); + } +}; + +at::Tensor &random_(at::Tensor &self, c10::optional generator) { + return at::native::templates::random_impl(self, generator); +} + +at::Tensor &random_from_to( + at::Tensor &self, int64_t from, c10::optional to, c10::optional generator) { + return at::native::templates::random_from_to_impl(self, from, to, generator); +} + +at::Tensor &random_to(at::Tensor &self, int64_t to, c10::optional generator) { + return random_from_to(self, 0, to, generator); +} + +// ==================================================== Uniform ======================================================= + +template +struct UniformKernel { + void operator()(TensorIterator &iter, double from, double to, c10::optional generator) { + const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); + const auto key = key_t.data_ptr(); + AT_DISPATCH_FLOATING_TYPES_AND2( + at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "uniform_kernel", [&] { + aes_helper( + iter, key, [from, to] TORCH_CSPRNG_HOST_DEVICE(RNGValues<1> * generator) -> scalar_t { + uniform_real_distribution uniform(from, to); + return static_cast(uniform(generator)); + }); + }); + } +}; + +at::Tensor &uniform_(at::Tensor &self, double from, double to, c10::optional generator) { + return at::native::templates::uniform_impl_(self, from, to, generator); +} + +// ==================================================== Normal ======================================================== + +template +struct NormalKernel { + void operator()(Tensor &self, double mean, double std, c10::optional generator) { + auto iter = TensorIterator::nullary_op(self); + const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); + const auto key = key_t.data_ptr(); + AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "normal_kernel", [&] { + aes_helper( + iter, key, [mean, std] TORCH_CSPRNG_HOST_DEVICE(RNGValues<2> * gen) -> scalar_t { + normal_distribution normal(mean, std); + return static_cast(normal(gen)); + }); + }); + } +}; + +at::Tensor &normal_(at::Tensor &self, double mean, double std, c10::optional generator) { + return at::native::templates::normal_impl_(self, mean, std, generator); +} + +at::Tensor &normal_Tensor_float_out( + at::Tensor &output, const at::Tensor &mean, double std, c10::optional gen) { + return at::native::templates::normal_out_impl(output, mean, std, gen); +} + +at::Tensor &normal_float_Tensor_out( + at::Tensor &output, double mean, const at::Tensor &std, c10::optional gen) { + return at::native::templates::normal_out_impl(output, mean, std, gen); +} + +at::Tensor &normal_Tensor_Tensor_out( + at::Tensor &output, const at::Tensor &mean, const at::Tensor &std, c10::optional gen) { + return at::native::templates::normal_out_impl(output, mean, std, gen); +} + +at::Tensor normal_Tensor_float(const at::Tensor &mean, double std, c10::optional gen) { + return at::native::templates::normal_impl(mean, std, gen); +} + +at::Tensor normal_float_Tensor(double mean, const at::Tensor &std, c10::optional gen) { + return at::native::templates::normal_impl(mean, std, gen); +} + +at::Tensor normal_Tensor_Tensor(const at::Tensor &mean, const at::Tensor &std, c10::optional gen) { + return at::native::templates::normal_impl(mean, std, gen); +} + +// ==================================================== Cauchy ======================================================== + +template +struct CauchyKernel { + void operator()(TensorIterator &iter, double median, double sigma, c10::optional generator) { + const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); + const auto key = key_t.data_ptr(); + AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "cauchy_kernel", [&] { + aes_helper( + iter, key, [median, sigma] TORCH_CSPRNG_HOST_DEVICE(RNGValues<1> * gen) -> scalar_t { + cauchy_distribution cauchy(median, sigma); + return static_cast(cauchy(gen)); + }); + }); + } +}; + +at::Tensor &cauchy_(at::Tensor &self, double median, double sigma, c10::optional generator) { + return at::native::templates::cauchy_impl_(self, median, sigma, generator); +} + +// ================================================== LogNormal ======================================================= + +template +struct LogNormalKernel { + void operator()(TensorIterator &iter, double mean, double std, c10::optional generator) { + const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); + const auto key = key_t.data_ptr(); + AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "log_normal", [&] { + aes_helper( + iter, key, [mean, std] TORCH_CSPRNG_HOST_DEVICE(RNGValues<2> * gen) -> scalar_t { + lognormal_distribution logNormal(mean, std); + return static_cast(logNormal(gen)); + }); + }); + } +}; + +at::Tensor &log_normal_(at::Tensor &self, double mean, double std, c10::optional gen) { + return at::native::templates::log_normal_impl_(self, mean, std, gen); +} + +// ================================================== Geometric ======================================================= + +template +struct GeometricKernel { + void operator()(TensorIterator &iter, double p, c10::optional generator) { + const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); + const auto key = key_t.data_ptr(); + AT_DISPATCH_FLOATING_TYPES_AND2( + at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "geometric_kernel", [&] { + aes_helper::type, 1>( + iter, key, [p] TORCH_CSPRNG_HOST_DEVICE(RNGValues<1> * gen) -> scalar_t { + geometric_distribution geometric(p); + return geometric(gen); + }); + }); + } +}; + +at::Tensor &geometric_(at::Tensor &self, double p, c10::optional gen) { + return at::native::templates::geometric_impl_(self, p, gen); +} + +// ================================================== Exponential ===================================================== + +template +struct ExponentialKernel { + void operator()(TensorIterator &iter, double lambda, c10::optional generator) { + const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); + const auto key = key_t.data_ptr(); + AT_DISPATCH_FLOATING_TYPES_AND2( + at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "exponential_kernel", [&] { + aes_helper(iter, key, [lambda] TORCH_CSPRNG_HOST_DEVICE(RNGValues<1> * gen) -> scalar_t { + exponential_distribution exponential(lambda); + return static_cast(exponential(gen)); + }); + }); + } +}; + +at::Tensor &exponential_(at::Tensor &self, double lambda, c10::optional gen) { + return at::native::templates::exponential_impl_(self, lambda, gen); +} + +// ================================================Encrypt/Decrypt===================================================== + +void check_cipher(const std::string &cipher, Tensor key) { + if (cipher == "aes128") { + TORCH_CHECK(key.element_size() * key.numel() == 16, "key tensor must have 16 bytes(128 bits)"); + } else { + TORCH_CHECK(false, "encrypt/decrypt supports \"aes128\" cipher, \"", cipher, "\" is not supported."); + } +} + +void aes_ecb_encrypt(Tensor input, Tensor output, uint8_t *key_bytes) { + block_cipher(input, output, + [key_bytes] TORCH_CSPRNG_HOST_DEVICE(int64_t idx, uint8_t * block) -> void { aes::encrypt(block, key_bytes); }); +} + +void aes_ecb_decrypt(Tensor input, Tensor output, uint8_t *key_bytes) { + block_cipher(input, output, + [key_bytes] TORCH_CSPRNG_HOST_DEVICE(int64_t idx, uint8_t * block) -> void { aes::decrypt(block, key_bytes); }); +} + +void aes_ctr_encrypt(Tensor input, Tensor output, uint8_t *key_bytes) { + block_cipher( + input, output, [key_bytes] TORCH_CSPRNG_HOST_DEVICE(int64_t idx, uint8_t * block) -> void { + uint8_t idx_block[aes::block_t_size]; + std::memset(&idx_block, 0, aes::block_t_size); + *(reinterpret_cast(idx_block)) = idx; + aes::encrypt(idx_block, key_bytes); + for (size_t i = 0; i < aes::block_t_size; i++) { + block[i] ^= idx_block[i]; + } + }); +} + +void aes_ctr_decrypt(Tensor input, Tensor output, uint8_t *key_bytes) { + aes_ctr_encrypt(input, output, key_bytes); +} + +Tensor encrypt(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode) { + TORCH_CHECK(input.device() == output.device() && input.device() == key.device(), + "input, output and key tensors must have the same device"); + const auto output_size_bytes = output.numel() * output.itemsize(); + const auto input_size_bytes = input.numel() * input.itemsize(); + const auto input_size_bytes_rounded = + (input_size_bytes + aes::block_t_size - 1) / aes::block_t_size * aes::block_t_size; + TORCH_CHECK(output_size_bytes == input_size_bytes_rounded, "output size in bytes(", output_size_bytes, + ") is not equal to input size in bytes rounded to block size(", input_size_bytes_rounded, ")"); + check_cipher(cipher, key); + const auto key_bytes = reinterpret_cast(key.contiguous().data_ptr()); + if (mode == "ecb") { + aes_ecb_encrypt(input, output, key_bytes); + } else if (mode == "ctr") { + aes_ctr_encrypt(input, output, key_bytes); + } else { + TORCH_CHECK(false, "encrypt/decrypt supports \"ecb\" and \"ctr\" modes, \"", mode, "\" is not supported."); + } + return output; +} + +Tensor decrypt(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode) { + TORCH_CHECK(input.device() == output.device() && input.device() == key.device(), + "input, output and key tensors must have the same device"); + const auto output_size_bytes = output.numel() * output.itemsize(); + const auto input_size_bytes = input.numel() * input.itemsize(); + const auto diff = input_size_bytes - output_size_bytes; + TORCH_CHECK(0 <= diff && diff < aes::block_t_size, + "output tensor size in bytes must be less then or equal to input tensor size in bytes, the difference must be less " + "than block size"); + TORCH_CHECK(input_size_bytes % aes::block_t_size == 0, + "input tensor size in bytes must divisible by cipher block size in bytes"); + check_cipher(cipher, key); + const auto key_bytes = reinterpret_cast(key.contiguous().data_ptr()); + if (mode == "ecb") { + aes_ecb_decrypt(input, output, key_bytes); + } else if (mode == "ctr") { + aes_ctr_decrypt(input, output, key_bytes); + } else { + TORCH_CHECK(false, "encrypt/decrypt supports \"ecb\" and \"ctr\" modes, \"", mode, "\" is not supported."); + } + return output; +} + +// The original kernels_body.inc ends here + +} // namespace cuda +} // namespace csprng +} // namespace torch diff --git a/torchcsprng/kernels.cuh b/torchcsprng/kernels.cuh new file mode 100644 index 0000000..4467e11 --- /dev/null +++ b/torchcsprng/kernels.cuh @@ -0,0 +1,84 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +namespace torch { +namespace csprng { +namespace cuda { + +// The original kernels_body.inc starts here + +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +// ==================================================== Random ======================================================== + +at::Tensor &random_(at::Tensor &self, c10::optional generator); + +at::Tensor &random_from_to( + at::Tensor &self, int64_t from, optional to, c10::optional generator); + +at::Tensor &random_to(at::Tensor &self, int64_t to, c10::optional generator); + +// ==================================================== Uniform ======================================================= + +at::Tensor &uniform_(at::Tensor &self, double from, double to, c10::optional generator); + +// ==================================================== Normal ======================================================== + +at::Tensor &normal_(at::Tensor &self, double mean, double std, c10::optional generator); + +at::Tensor &normal_Tensor_float_out( + at::Tensor &output, const at::Tensor &mean, double std, c10::optional gen); + +at::Tensor &normal_float_Tensor_out( + at::Tensor &output, double mean, const at::Tensor &std, c10::optional gen); + +at::Tensor &normal_Tensor_Tensor_out( + at::Tensor &output, const at::Tensor &mean, const at::Tensor &std, c10::optional gen); + +at::Tensor normal_Tensor_float(const at::Tensor &mean, double std, c10::optional gen); + +at::Tensor normal_float_Tensor(double mean, const at::Tensor &std, c10::optional gen); + +at::Tensor normal_Tensor_Tensor(const at::Tensor &mean, const at::Tensor &std, c10::optional gen); + +// ==================================================== Cauchy ======================================================== + +at::Tensor &cauchy_(at::Tensor &self, double median, double sigma, c10::optional generator); + +// ================================================== LogNormal ======================================================= + +at::Tensor &log_normal_(at::Tensor &self, double mean, double std, c10::optional gen); + +// ================================================== Geometric ======================================================= + +at::Tensor &geometric_(at::Tensor &self, double p, c10::optional gen); + +// ================================================== Exponential ===================================================== + +at::Tensor &exponential_(at::Tensor &self, double lambda, c10::optional gen); + +// ================================================Encrypt/Decrypt===================================================== + +Tensor encrypt(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode); + +Tensor decrypt(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode); + +// The original kernels_body.inc ends here + +} // namespace cuda +} // namespace csprng +} // namespace torch diff --git a/torchcsprng/kernels_commons.h b/torchcsprng/kernels_commons.h new file mode 100644 index 0000000..359ba2b --- /dev/null +++ b/torchcsprng/kernels_commons.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include "macros.h" +#include "block_cipher.h" + +inline uint64_t make64BitsFrom32Bits(uint32_t hi, uint32_t lo) { + return (static_cast(hi) << 32) | lo; +} + +// CUDA CSPRNG is actually CPU generator which is used only to generate a random key on CPU for AES running in a block +// mode on CUDA +struct CSPRNGGeneratorImpl : public c10::GeneratorImpl { + CSPRNGGeneratorImpl(bool use_rd) + : c10::GeneratorImpl{at::Device(at::DeviceType::CPU), at::DispatchKeySet(at::DispatchKey::CustomRNGKeyId)}, + use_rd_{use_rd} {} + CSPRNGGeneratorImpl(const std::string &token) + : c10::GeneratorImpl{at::Device(at::DeviceType::CPU), at::DispatchKeySet(at::DispatchKey::CustomRNGKeyId)}, + use_rd_{true}, + rd_{token} {} + CSPRNGGeneratorImpl(uint64_t seed) + : c10::GeneratorImpl{at::Device(at::DeviceType::CPU), at::DispatchKeySet(at::DispatchKey::CustomRNGKeyId)}, + use_rd_{false}, + mt_{static_cast(seed)} {} + ~CSPRNGGeneratorImpl() = default; + uint32_t random() { + return use_rd_ ? rd_() : mt_(); + } + uint64_t random64() { + return use_rd_ ? make64BitsFrom32Bits(rd_(), rd_()) : make64BitsFrom32Bits(mt_(), mt_()); + } + + void set_current_seed(uint64_t seed) override { + throw std::runtime_error("not implemented"); + } + uint64_t current_seed() const override { + throw std::runtime_error("not implemented"); + } + uint64_t seed() override { + throw std::runtime_error("not implemented"); + } + CSPRNGGeneratorImpl *clone_impl() const override { + throw std::runtime_error("not implemented"); + } + + static at::DeviceType device_type() { + return at::DeviceType::CPU; + } + + void set_state(const c10::TensorImpl &new_state) override { + throw std::runtime_error("not implemented"); + } + c10::intrusive_ptr get_state() const override { + throw std::runtime_error("not implemented"); + } + + void set_offset(uint64_t offset) override { + throw std::runtime_error("not implemented"); + } + uint64_t get_offset() const override { + throw std::runtime_error("not implenented"); + } + bool use_rd_; + std::random_device rd_; + std::mt19937 mt_; +}; diff --git a/torchcsprng/csrc/macros.h b/torchcsprng/macros.h similarity index 100% rename from torchcsprng/csrc/macros.h rename to torchcsprng/macros.h From 32429599a19768474db1071491921af6f9190bc8 Mon Sep 17 00:00:00 2001 From: myl7 Date: Mon, 11 Dec 2023 16:01:22 +0800 Subject: [PATCH 03/10] Add wrapper and build config Remove old build config. Move the version file. --- CMakeLists.txt | 30 ++++ lib.cpp | 28 ++++ lib.h | 8 + setup.py | 193 ------------------------- torchcsprng/block_cipher.h | 2 + version.txt => torchcsprng/version.txt | 0 6 files changed, 68 insertions(+), 193 deletions(-) create mode 100644 CMakeLists.txt create mode 100644 lib.cpp create mode 100644 lib.h delete mode 100644 setup.py rename version.txt => torchcsprng/version.txt (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..b5a151c --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,30 @@ +# Copyright (C) myl7 +# SPDX-License-Identifier: BSD-3-Clause + +cmake_minimum_required(VERSION 3.28) +project(fss-prg-cuda LANGUAGES CUDA CXX) +set(CMAKE_CXX_STANDARD 17) + +include(CheckLanguage) +check_language(CUDA) + +find_package(Torch REQUIRED) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") + +add_library( + fssprgcuda SHARED + lib.cpp + lib.h + torchcsprng/THCIntegerDivider.cuh + torchcsprng/aes.h + torchcsprng/kernels_commons.h + torchcsprng/kernels.cuh + torchcsprng/csprng.cpp + torchcsprng/block_cipher.h + torchcsprng/macros.h + torchcsprng/OffsetCalculator.cuh + torchcsprng/kernels.cu +) +set_target_properties(fssprgcuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON) +target_link_libraries(fssprgcuda "${TORCH_LIBRARIES}") +target_compile_options(fssprgcuda PRIVATE $<$: --expt-extended-lambda>) diff --git a/lib.cpp b/lib.cpp new file mode 100644 index 0000000..9f0ec19 --- /dev/null +++ b/lib.cpp @@ -0,0 +1,28 @@ +// Copyright (C) myl7 +// SPDX-License-Identifier: BSD-3-Clause + +#include "lib.h" + +#include + +using torch::Tensor; + +extern Tensor encrypt(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode); + +constexpr size_t block_t_size = 16; + +void csprng_matyas_meyer_oseas_aes128(uint8_t *buf, int64_t buf_size, const uint8_t *key) { + const auto input_size_bytes = buf_size; + TORCH_CHECK(input_size_bytes % block_t_size == 0, "input size in bytes(", input_size_bytes, + ") is not a multiple of block size(", block_t_size, ")"); + Tensor input = torch::from_blob(buf, {input_size_bytes}, torch::kUInt8).to(torch::kCUDA); + + const auto output_size_bytes = input_size_bytes; + Tensor output = torch::empty({output_size_bytes}, torch::kUInt8); + + const auto key_size_bytes = 16; + Tensor key_tensor = torch::from_blob(const_cast(key), {key_size_bytes}, torch::kUInt8).to(torch::kCUDA); + + encrypt(input, output, key_tensor, "aes128", "ecb"); + input ^= output; +} diff --git a/lib.h b/lib.h new file mode 100644 index 0000000..287f9ec --- /dev/null +++ b/lib.h @@ -0,0 +1,8 @@ +// Copyright (C) myl7 +// SPDX-License-Identifier: BSD-3-Clause + +#pragma once + +#include + +void csprng_matyas_meyer_oseas_aes128(uint8_t *buf, int64_t buf_size, const uint8_t *key); diff --git a/setup.py b/setup.py deleted file mode 100644 index 5143b53..0000000 --- a/setup.py +++ /dev/null @@ -1,193 +0,0 @@ -import distutils.command.clean -import glob -import os -import shutil -import subprocess -import sys - -import torch -from setuptools import find_packages, setup -from torch.utils.cpp_extension import ( - BuildExtension, - CppExtension, - CUDA_HOME, - CUDAExtension, -) - -version = open("version.txt", "r").read().strip() -sha = "Unknown" -package_name = "torchcsprng" - -cwd = os.path.dirname(os.path.abspath(__file__)) - -try: - sha = ( - subprocess.check_output(["git", "rev-parse", "HEAD"], cwd=cwd) - .decode("ascii") - .strip() - ) -except Exception: - pass - -if os.getenv("BUILD_VERSION"): - version = os.getenv("BUILD_VERSION") -elif sha != "Unknown": - version += "+" + sha[:7] -print("Building wheel {}-{}".format(package_name, version)) - - -def write_version_file(): - version_path = os.path.join(cwd, "torchcsprng", "version.py") - with open(version_path, "w") as f: - f.write("__version__ = '{}'\n".format(version)) - f.write("git_version = {}\n".format(repr(sha))) - # f.write("from torchcsprng.extension import _check_cuda_version\n") - # f.write("if _check_cuda_version() > 0:\n") - # f.write(" cuda = _check_cuda_version()\n") - - -write_version_file() - -with open("README.md", "r") as fh: - long_description = fh.read() - - -requirements = [ - "torch", -] - - -def append_flags(flags, flags_to_append): - for flag in flags_to_append: - if not flag in flags: - flags.append(flag) - return flags - - -def get_extensions(): - build_cuda = torch.cuda.is_available() or os.getenv("FORCE_CUDA", "0") == "1" - - module_name = "torchcsprng" - - extensions_dir = os.path.join(cwd, module_name, "csrc") - - openmp = "ATen parallel backend: OpenMP" in torch.__config__.parallel_info() - - main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) - source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) - - sources = main_file + source_cpu - extension = CppExtension - - define_macros = [] - - cxx_flags = os.getenv("CXX_FLAGS", "") - if cxx_flags == "": - cxx_flags = [] - else: - cxx_flags = cxx_flags.split(" ") - if openmp: - if sys.platform == "linux": - cxx_flags = append_flags(cxx_flags, ["-fopenmp"]) - elif sys.platform == "win32": - cxx_flags = append_flags(cxx_flags, ["/openmp"]) - # elif sys.platform == 'darwin': - # cxx_flags = append_flags(cxx_flags, ['-Xpreprocessor', '-fopenmp']) - - if build_cuda: - extension = CUDAExtension - source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) - sources += source_cuda - - define_macros += [("WITH_CUDA", None)] - - nvcc_flags = os.getenv("NVCC_FLAGS", "") - if nvcc_flags == "": - nvcc_flags = [] - else: - nvcc_flags = nvcc_flags.split(" ") - nvcc_flags = append_flags(nvcc_flags, ["--expt-extended-lambda", "-Xcompiler"]) - extra_compile_args = { - "cxx": cxx_flags, - "nvcc": nvcc_flags, - } - else: - extra_compile_args = { - "cxx": cxx_flags, - } - - ext_modules = [ - extension( - module_name + "._C", - sources, - define_macros=define_macros, - extra_compile_args=extra_compile_args, - ) - ] - - return ext_modules - - -class clean(distutils.command.clean.clean): - def run(self): - with open(".gitignore", "r") as f: - ignores = f.read() - start_deleting = False - for wildcard in filter(None, ignores.split("\n")): - if ( - wildcard - == "# do not change or delete this comment - `python setup.py clean` deletes everything after this line" - ): - start_deleting = True - if not start_deleting: - continue - for filename in glob.glob(wildcard): - try: - os.remove(filename) - except OSError: - shutil.rmtree(filename, ignore_errors=True) - - # It's an old-style class in Python 2.7... - distutils.command.clean.clean.run(self) - - -setup( - # Metadata - name=package_name, - version=version, - author="Pavel Belevich", - author_email="pbelevich@fb.com", - url="https://github.com/pytorch/csprng", - description="Cryptographically secure pseudorandom number generators for PyTorch", - long_description=long_description, - long_description_content_type="text/markdown", - license="BSD-3", - # Package info - packages=find_packages(exclude=("test",)), - classifiers=[ - "Intended Audience :: Developers", - "Intended Audience :: Education", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: BSD License", - "Programming Language :: C++", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Topic :: Scientific/Engineering", - "Topic :: Scientific/Engineering :: Mathematics", - "Topic :: Scientific/Engineering :: Artificial Intelligence", - "Topic :: Software Development", - "Topic :: Software Development :: Libraries", - "Topic :: Software Development :: Libraries :: Python Modules", - ], - python_requires=">=3.6", - install_requires=requirements, - ext_modules=get_extensions(), - test_suite="test", - cmdclass={ - "build_ext": BuildExtension, - "clean": clean, - }, -) diff --git a/torchcsprng/block_cipher.h b/torchcsprng/block_cipher.h index aeae133..8ab8080 100644 --- a/torchcsprng/block_cipher.h +++ b/torchcsprng/block_cipher.h @@ -60,6 +60,8 @@ TORCH_CSPRNG_HOST_DEVICE static void block_cipher_kernel_helper(int64_t idx, cip int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc, transform_t transform) { uint8_t block[block_size]; std::memset(&block, 0, block_size); // is it ok to use zeros as padding? + // In this application, we require users to pass in the input that is a multiple of block_size. + // So zero padding never actually happens and it is ok. if (input_ptr != nullptr) { copy_input_to_block(idx, block, block_size, input_ptr, input_numel, input_type_size, input_index_calc); } diff --git a/version.txt b/torchcsprng/version.txt similarity index 100% rename from version.txt rename to torchcsprng/version.txt From cca573341ea8d05ae9a81ccf5af85f50b45bd639 Mon Sep 17 00:00:00 2001 From: myl7 Date: Wed, 3 Jan 2024 20:44:36 +0800 Subject: [PATCH 04/10] Update build config Update gitignore. Update include in code. --- .gitignore | 36 ++++++++++++++++++++++++++++++++++++ CMakeLists.txt | 14 +++++++++++++- lib.cpp | 1 - lib.h | 2 +- 4 files changed, 50 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 11d8384..f683121 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,39 @@ +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + # IDE .idea .vscode + +# Build +/build diff --git a/CMakeLists.txt b/CMakeLists.txt index b5a151c..04ea01f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,16 +1,28 @@ # Copyright (C) myl7 # SPDX-License-Identifier: BSD-3-Clause -cmake_minimum_required(VERSION 3.28) +cmake_minimum_required(VERSION 3.22) +# Set env `CUDACXX=/absolute/path/to/nvcc` to enable the CUDA language +# if nvcc is not in the PATH. project(fss-prg-cuda LANGUAGES CUDA CXX) set(CMAKE_CXX_STANDARD 17) include(CheckLanguage) check_language(CUDA) +# Pass `-DCMAKE_PREFIX_PATH=/absolute/path/to/libtorch` to cmake to locate LibTorch. +# If the package cmake config is confused with a soft link from /usr/local/cuda-* to /usr/local/cuda, +# pass `-DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-*` to cmake to locate CUDA. +# Pass `-DGPU_ARCHS=` to cmake to specify the CPU archtecture. +# You can check the value alternatively at https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ . +# This value set by LibTorch (version that supports CUDA 12.1) has arch that is not supported by CUDA 12.1 instead. find_package(Torch REQUIRED) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") +# Fix Python.h not found even though it is just located in /usr/include/python* +find_package(PythonLibs REQUIRED) +include_directories(${PYTHON_INCLUDE_DIRS}) + add_library( fssprgcuda SHARED lib.cpp diff --git a/lib.cpp b/lib.cpp index 9f0ec19..0daa1f2 100644 --- a/lib.cpp +++ b/lib.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: BSD-3-Clause #include "lib.h" - #include using torch::Tensor; diff --git a/lib.h b/lib.h index 287f9ec..b809e76 100644 --- a/lib.h +++ b/lib.h @@ -3,6 +3,6 @@ #pragma once -#include +#include void csprng_matyas_meyer_oseas_aes128(uint8_t *buf, int64_t buf_size, const uint8_t *key); From 83633aa33e51bca3398ddb01fd8b46f0b9535b14 Mon Sep 17 00:00:00 2001 From: myl7 Date: Thu, 4 Jan 2024 02:01:40 +0800 Subject: [PATCH 05/10] =?UTF-8?q?Impl=20Matyas=E2=80=93Meyer=E2=80=93Oseas?= =?UTF-8?q?=20inside=20kernel?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix cmake language detection. Remove unused code. Only init round key once. --- CMakeLists.txt | 16 +- lib.cpp | 1 - torchcsprng/THCIntegerDivider.cuh | 7 +- torchcsprng/{aes.h => aes.cu} | 36 +- torchcsprng/aes.cuh | 21 + .../{block_cipher.h => block_cipher.cuh} | 40 +- torchcsprng/csprng.cpp | 292 ------------ torchcsprng/kernels.cu | 433 +----------------- torchcsprng/kernels.cuh | 53 +-- ...{kernels_commons.h => kernels_commons.cuh} | 4 +- torchcsprng/{macros.h => macros.cuh} | 1 + torchcsprng/owcf.cu | 30 ++ torchcsprng/owcf.cuh | 17 + 13 files changed, 107 insertions(+), 844 deletions(-) rename torchcsprng/{aes.h => aes.cu} (92%) create mode 100644 torchcsprng/aes.cuh rename torchcsprng/{block_cipher.h => block_cipher.cuh} (74%) delete mode 100644 torchcsprng/csprng.cpp rename torchcsprng/{kernels_commons.h => kernels_commons.cuh} (98%) rename torchcsprng/{macros.h => macros.cuh} (94%) create mode 100644 torchcsprng/owcf.cu create mode 100644 torchcsprng/owcf.cuh diff --git a/CMakeLists.txt b/CMakeLists.txt index 04ea01f..1899a4b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,15 +27,17 @@ add_library( fssprgcuda SHARED lib.cpp lib.h - torchcsprng/THCIntegerDivider.cuh - torchcsprng/aes.h - torchcsprng/kernels_commons.h + torchcsprng/kernels.cu torchcsprng/kernels.cuh - torchcsprng/csprng.cpp - torchcsprng/block_cipher.h - torchcsprng/macros.h + torchcsprng/macros.cuh + torchcsprng/kernels_commons.cuh + torchcsprng/block_cipher.cuh torchcsprng/OffsetCalculator.cuh - torchcsprng/kernels.cu + torchcsprng/THCIntegerDivider.cuh + torchcsprng/owcf.cu + torchcsprng/owcf.cuh + torchcsprng/aes.cu + torchcsprng/aes.cuh ) set_target_properties(fssprgcuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON) target_link_libraries(fssprgcuda "${TORCH_LIBRARIES}") diff --git a/lib.cpp b/lib.cpp index 0daa1f2..dfb406e 100644 --- a/lib.cpp +++ b/lib.cpp @@ -23,5 +23,4 @@ void csprng_matyas_meyer_oseas_aes128(uint8_t *buf, int64_t buf_size, const uint Tensor key_tensor = torch::from_blob(const_cast(key), {key_size_bytes}, torch::kUInt8).to(torch::kCUDA); encrypt(input, output, key_tensor, "aes128", "ecb"); - input ^= output; } diff --git a/torchcsprng/THCIntegerDivider.cuh b/torchcsprng/THCIntegerDivider.cuh index b7dfb6a..8e67945 100644 --- a/torchcsprng/THCIntegerDivider.cuh +++ b/torchcsprng/THCIntegerDivider.cuh @@ -8,9 +8,12 @@ #ifndef THC_INTEGER_DIVIDER_INC #define THC_INTEGER_DIVIDER_INC -#include -#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) +#include +// #if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) +#if defined(__CUDACC__) || defined(__HIPCC__) #include +#else +#error "CUDA not found" #endif // A utility class to implement integer division by multiplication, given a fixed diff --git a/torchcsprng/aes.h b/torchcsprng/aes.cu similarity index 92% rename from torchcsprng/aes.h rename to torchcsprng/aes.cu index 32041f3..38c5c28 100644 --- a/torchcsprng/aes.h +++ b/torchcsprng/aes.cu @@ -5,6 +5,8 @@ * LICENSE file in the root directory of this source tree. */ +#include "aes.cuh" + namespace aes { // This AES implementation is based on @@ -55,8 +57,6 @@ namespace aes { #define Nr 10 // The number of rounds in AES Cipher. #endif -constexpr size_t block_t_size = 16; - typedef uint8_t state_t[4][4]; // The lookup-tables are marked const so they can be placed in read-only storage instead of RAM @@ -102,8 +102,7 @@ TORCH_CSPRNG_CONSTANT const uint8_t Rcon[11] = {0x8d, 0x01, 0x02, 0x04, 0x08, 0x #define getSBoxInvert(num) (rsbox[(num)]) -// This function produces Nb(Nr+1) round keys. The round keys are used in each round to decrypt the states. -TORCH_CSPRNG_HOST_DEVICE void KeyExpansion(uint8_t *RoundKey, const uint8_t *Key) { +void KeyExpansion(uint8_t *RoundKey, const uint8_t *Key) { unsigned int i, j, k; uint8_t tempa[4]; // Used for the column/row operations @@ -312,10 +311,7 @@ TORCH_CSPRNG_HOST_DEVICE void InvShiftRows(state_t *state) { (*state)[3][3] = temp; } -TORCH_CSPRNG_HOST_DEVICE void encrypt(uint8_t *state, const uint8_t *key) { - uint8_t RoundKey[176]; - KeyExpansion(RoundKey, key); - +TORCH_CSPRNG_HOST_DEVICE void encrypt_with_round_key(uint8_t *state, const uint8_t *RoundKey) { uint8_t round = 0; // Add the First round key to the state before starting the rounds. @@ -338,28 +334,4 @@ TORCH_CSPRNG_HOST_DEVICE void encrypt(uint8_t *state, const uint8_t *key) { AddRoundKey(Nr, (state_t *)state, RoundKey); } -TORCH_CSPRNG_HOST_DEVICE void decrypt(uint8_t *state, const uint8_t *key) { - uint8_t RoundKey[176]; - KeyExpansion(RoundKey, key); - - uint8_t round = 0; - - // Add the First round key to the state before starting the rounds. - AddRoundKey(Nr, (state_t *)state, RoundKey); - - // There will be Nr rounds. - // The first Nr-1 rounds are identical. - // These Nr rounds are executed in the loop below. - // Last one without InvMixColumn() - for (round = (Nr - 1);; --round) { - InvShiftRows((state_t *)state); - InvSubBytes((state_t *)state); - AddRoundKey(round, (state_t *)state, RoundKey); - if (round == 0) { - break; - } - InvMixColumns((state_t *)state); - } -} - } // namespace aes diff --git a/torchcsprng/aes.cuh b/torchcsprng/aes.cuh new file mode 100644 index 0000000..e3e2eab --- /dev/null +++ b/torchcsprng/aes.cuh @@ -0,0 +1,21 @@ +// Copyright (C) myl7 +// SPDX-License-Identifier: BSD-3-Clause + +#pragma once + +#include +#include +#include "macros.cuh" + +namespace aes { + +constexpr size_t block_t_size = 16; +constexpr size_t round_key_t_size = 176; + +TORCH_CSPRNG_HOST_DEVICE void encrypt_with_round_key(uint8_t *state, const uint8_t *RoundKey); +// This function produces Nb(Nr+1) round keys. The round keys are used in each round to decrypt the states. +// +// In our usecase, not run on GPU since it is only run once. +void KeyExpansion(uint8_t *RoundKey, const uint8_t *Key); + +} // namespace aes diff --git a/torchcsprng/block_cipher.h b/torchcsprng/block_cipher.cuh similarity index 74% rename from torchcsprng/block_cipher.h rename to torchcsprng/block_cipher.cuh index 8ab8080..d36bf94 100644 --- a/torchcsprng/block_cipher.h +++ b/torchcsprng/block_cipher.cuh @@ -7,7 +7,7 @@ #pragma once -#include "macros.h" +#include "macros.cuh" #include #include #include "OffsetCalculator.cuh" @@ -18,11 +18,14 @@ #if defined(__CUDACC__) || defined(__HIPCC__) #include #include +#else +#error "CUDA not found" #endif #if defined(__CUDACC__) || defined(__HIPCC__) #define UNROLL_IF_CUDA #pragma unroll #else +#error "CUDA not found" #define UNROLL_IF_CUDA #endif @@ -81,36 +84,10 @@ __global__ static void block_cipher_kernel_cuda(cipher_t cipher, int output_elem block_cipher_kernel_helper(idx, cipher, output_elem_per_block, input_ptr, input_numel, input_type_size, input_index_calc, output_ptr, output_numel, output_type_size, output_index_calc, transform); } +#else +#error "CUDA not found" #endif -template -static void block_cipher_kernel_cpu_serial(int64_t begin, int64_t end, cipher_t cipher, int output_elem_per_block, - void *input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, void *output_ptr, - int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc, transform_t transform) { - for (auto idx = begin; idx < end; ++idx) { - block_cipher_kernel_helper(idx, cipher, output_elem_per_block, input_ptr, input_numel, input_type_size, - input_index_calc, output_ptr, output_numel, output_type_size, output_index_calc, transform); - } -} - -template -static void block_cipher_kernel_cpu(int64_t total, cipher_t cipher, int output_elem_per_block, void *input_ptr, - int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, void *output_ptr, int64_t output_numel, - int output_type_size, output_index_calc_t output_index_calc, transform_t transform_func) { - if (total < at::internal::GRAIN_SIZE || at::get_num_threads() == 1) { - block_cipher_kernel_cpu_serial(0, total, cipher, output_elem_per_block, input_ptr, input_numel, - input_type_size, input_index_calc, output_ptr, output_numel, output_type_size, output_index_calc, transform_func); - } else { - at::parallel_for(0, total, at::internal::GRAIN_SIZE, [&](int64_t begin, int64_t end) { - block_cipher_kernel_cpu_serial(begin, end, cipher, output_elem_per_block, input_ptr, input_numel, - input_type_size, input_index_calc, output_ptr, output_numel, output_type_size, output_index_calc, - transform_func); - }); - } -} - template void block_cipher(void *input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, @@ -121,9 +98,7 @@ void block_cipher(void *input_ptr, int64_t input_numel, int input_type_size, inp } if (device.type() == at::kCPU) { - const auto total = (output_numel + output_elem_per_block - 1) / output_elem_per_block; - block_cipher_kernel_cpu(total, cipher, output_elem_per_block, input_ptr, input_numel, input_type_size, - input_index_calc, output_ptr, output_numel, output_type_size, output_index_calc, transform_func); + TORCH_CHECK(false, "torchcsprng was compiled with only CUDA support"); } else if (device.type() == at::kCUDA) { #if defined(__CUDACC__) || defined(__HIPCC__) const auto threads = 256; @@ -134,6 +109,7 @@ void block_cipher(void *input_ptr, int64_t input_numel, int input_type_size, inp transform_func); AT_CUDA_CHECK(cudaGetLastError()); #else +#error "CUDA not found" TORCH_CHECK(false, "torchcsprng was compiled without CUDA support"); #endif } else { diff --git a/torchcsprng/csprng.cpp b/torchcsprng/csprng.cpp deleted file mode 100644 index 26c527d..0000000 --- a/torchcsprng/csprng.cpp +++ /dev/null @@ -1,292 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include - -#include -#include -#include - -#include "kernels_commons.h" -#include "kernels.cuh" - -using namespace at; -using namespace torch::csprng; - -static const auto GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE = "generator does not support tensor device type"; -static const auto TENSOR_DEVICE_TYPE_IS_NOT_SUPPORTED = "tensor device type is not supported"; - -// ==================================================== Random ======================================================== - -Tensor &random_(Tensor &self, c10::optional gen) { - if (self.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::random_(self, gen); - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -Tensor &random_from_to(Tensor &self, int64_t from, optional to, c10::optional gen) { - if (self.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::random_from_to(self, from, to, gen); - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -Tensor &random_to(Tensor &self, int64_t to, c10::optional gen) { - if (self.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::random_to(self, to, gen); - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -// ==================================================== Uniform ======================================================= - -Tensor &uniform_(Tensor &self, double from, double to, c10::optional gen) { - if (self.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::uniform_(self, from, to, gen); - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -// ==================================================== Normal ======================================================== - -Tensor &normal_(Tensor &self, double mean, double std, c10::optional gen) { - if (self.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::normal_(self, mean, std, gen); - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -Tensor &normal_Tensor_float_out(const Tensor &mean, double std, c10::optional gen, Tensor &output) { - if (output.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::normal_Tensor_float_out(output, mean, std, gen); - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -Tensor &normal_float_Tensor_out(double mean, const Tensor &std, c10::optional gen, Tensor &output) { - if (output.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::normal_float_Tensor_out(output, mean, std, gen); - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -Tensor &normal_Tensor_Tensor_out(const Tensor &mean, const Tensor &std, c10::optional gen, Tensor &output) { - if (output.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::normal_Tensor_Tensor_out(output, mean, std, gen); - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -Tensor normal_Tensor_float(const Tensor &mean, double std, c10::optional gen) { - if (mean.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::normal_Tensor_float(mean, std, gen); - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -Tensor normal_float_Tensor(double mean, const Tensor &std, c10::optional gen) { - if (std.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::normal_float_Tensor(mean, std, gen); - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -Tensor normal_Tensor_Tensor(const Tensor &mean, const Tensor &std, c10::optional gen) { - if (mean.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::normal_Tensor_Tensor(mean, std, gen); - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -// ==================================================== Cauchy ======================================================== - -Tensor &cauchy_(Tensor &self, double median, double sigma, c10::optional gen) { - if (self.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::cauchy_(self, median, sigma, gen); - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -// ================================================== LogNormal ======================================================= - -Tensor &log_normal_(Tensor &self, double mean, double std, c10::optional gen) { - if (self.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::log_normal_(self, mean, std, gen); - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -// ================================================== Geometric ======================================================= - -Tensor &geometric_(Tensor &self, double p, c10::optional gen) { - if (self.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::geometric_(self, p, gen); - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -// ================================================== Exponential ===================================================== - -Tensor &exponential_(Tensor &self, double lambda, c10::optional gen) { - if (self.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::exponential_(self, lambda, gen); - } else { - TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); - } -} - -// =============================================== Random permutation ================================================= - -// randperm implementation was copied from PyTorch to unblock CSPRNG users, but ultimately CSPRNG must reuse -// refactored randperm from PyTorch, see https://github.com/pytorch/pytorch/issues/43816 - -namespace { - -inline void check_supported_max_int_with_precision(int64_t n, const Tensor &tensor) { - TORCH_CHECK(at::scalar_tensor(n, tensor.options()).defined(), "n is too large for result tensor type: '", - tensor.toString(), "'"); - - // Ensure sufficient precision for floating point representation. - switch (tensor.scalar_type()) { - case at::ScalarType::Half: - TORCH_CHECK(n <= (int64_t(1) << 11) + 1, "n cannot be greater than 2049 for Half type."); - break; - case at::ScalarType::Float: - TORCH_CHECK(n <= (int64_t(1) << 24) + 1, "n cannot be greater than 2^24+1 for Float type."); - break; - case at::ScalarType::Double: // Unlikely to happen, but doesn't hurt to check - TORCH_CHECK(n <= (int64_t(1) << 53) + 1, "n cannot be greater than 2^53+1 for Double type."); - break; - default: - break; - } -} - -template -void randperm(Tensor &result, int64_t n, c10::optional generator) { - auto gen = at::check_generator(generator); - scalar_t *r__data = result.data_ptr(); - - result.resize_({n}); - int64_t r__stride_0 = result.stride(0); - - at::parallel_for(0, n, internal::GRAIN_SIZE, [&r__data, &r__stride_0](int64_t p_begin, int64_t p_end) { - for (int64_t i = p_begin; i < p_end; i++) r__data[i * r__stride_0] = static_cast(i); - }); - - for (int64_t i = 0; i < n - 1; i++) { - int64_t z = gen->random() % (n - i); - scalar_t sav = r__data[i * r__stride_0]; - r__data[i * r__stride_0] = r__data[(z + i) * r__stride_0]; - r__data[(z + i) * r__stride_0] = sav; - } -} -} // namespace - -Tensor &randperm_generator_out(int64_t n, c10::optional generator, Tensor &result) { - TORCH_CHECK(n >= 0, "n must be non-negative, got", n); - check_supported_max_int_with_precision(n, result); - if (result.device().type() == at::kCUDA) { - auto result_cpu = at::empty({n}, result.options().device(kCPU)); - randperm_generator_out(n, generator, result_cpu); - result.resize_({n}); - return result.copy_(result_cpu); - } - result.resize_({n}); - // See Note [Acquire lock when using random generators] - std::lock_guard lock(generator->mutex()); - AT_DISPATCH_ALL_TYPES_AND(at::ScalarType::Half, result.scalar_type(), "randperm", - [&]() -> void { randperm(result, n, generator); }); - return result; -} - -// ================================================Encrypt/Decrypt===================================================== - -Tensor encrypt_pybind(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode) { - if (input.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::encrypt(input, output, key, cipher, mode); - } else { - TORCH_CHECK(false, TENSOR_DEVICE_TYPE_IS_NOT_SUPPORTED); - } -} - -Tensor decrypt_pybind(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode) { - if (input.device().type() == DeviceType::CUDA) { - return torch::csprng::cuda::decrypt(input, output, key, cipher, mode); - } else { - TORCH_CHECK(false, TENSOR_DEVICE_TYPE_IS_NOT_SUPPORTED); - } -} - -// ==================================================================================================================== - -Generator create_random_device_generator(c10::optional token = c10::nullopt) { - if (token.has_value()) { - return make_generator(*token); - } else { - return make_generator(true); - } -} - -Generator create_mt19937_generator(c10::optional seed = c10::nullopt) { - if (seed.has_value()) { - return make_generator(*seed); - } else { - return make_generator(false); - } -} - -bool supports_cuda() { -#ifdef WITH_CUDA - return true; -#else - return false; -#endif -} - -TORCH_LIBRARY_IMPL(aten, CustomRNGKeyId, m) { - // Random - m.impl("random_.from", random_from_to); - m.impl("random_.to", random_to); - m.impl("random_", random_); - // Uniform - m.impl("uniform_", uniform_); - // Normal - m.impl("normal_", normal_); - m.impl("normal.Tensor_float_out", normal_Tensor_float_out); - m.impl("normal.float_Tensor_out", normal_float_Tensor_out); - m.impl("normal.Tensor_Tensor_out", normal_Tensor_Tensor_out); - m.impl("normal.Tensor_float", normal_Tensor_float); - m.impl("normal.float_Tensor", normal_float_Tensor); - m.impl("normal.Tensor_Tensor", normal_Tensor_Tensor); - // Cauchy - m.impl("cauchy_", cauchy_); - // LogNormal - m.impl("log_normal_", log_normal_); - // Geometric - m.impl("geometric_", geometric_); - // Exponential - m.impl("exponential_", exponential_); - // Random permutation - m.impl("randperm.generator_out", randperm_generator_out); -} diff --git a/torchcsprng/kernels.cu b/torchcsprng/kernels.cu index 65bb01f..f55153d 100644 --- a/torchcsprng/kernels.cu +++ b/torchcsprng/kernels.cu @@ -5,7 +5,9 @@ * LICENSE file in the root directory of this source tree. */ -#include "kernels_commons.h" +#include "kernels_commons.cuh" +#include "aes.cuh" +#include "owcf.cuh" namespace torch { namespace csprng { @@ -20,382 +22,6 @@ namespace cuda { * LICENSE file in the root directory of this source tree. */ -#include "aes.h" - -// Generates `block_t_size`-bytes random key Tensor on CPU -// using `generator`, which must be an instance of `at::CPUGeneratorImpl` -// and passes it to the `device`. -template -at::Tensor key_tensor(size_t block_t_size, c10::optional generator) { - std::lock_guard lock(generator->mutex()); - auto gen = at::check_generator(generator); - auto key = torch::empty({static_cast(block_t_size)}, torch::kUInt8); - using random_t = typename std::result_of::type; - constexpr size_t random_t_size = sizeof(random_t); - for (size_t i = 0; i < block_t_size / random_t_size; i++) { - const auto rand = gen->random(); - for (size_t j = 0; j < random_t_size; j++) { - size_t k = i * random_t_size + j; - key[k] = static_cast((rand >> (j * 8)) & 0xff); - } - } - return key; -} - -template -at::Tensor aes128_key_tensor(at::Generator generator) { - return key_tensor(aes::block_t_size, generator); -} - -// ==================================================================================================================== - -// A simple container for random state sub-blocks that implements RNG interface -// with random() and random64() methods, that are used by transformation function -template -struct RNGValues { - TORCH_CSPRNG_HOST_DEVICE RNGValues(uint64_t *vals) { - memcpy(&vals_, vals, size * sizeof(uint64_t)); - } - uint32_t TORCH_CSPRNG_HOST_DEVICE random() { - auto res = static_cast(vals_[index]); - index++; - return res; - } - uint64_t TORCH_CSPRNG_HOST_DEVICE random64() { - auto res = vals_[index]; - index++; - return res; - } - - private: - uint64_t vals_[size]; - int index = 0; -}; - -// Applies AES in CTR mode with the `key` for passed TensorIterator iter. -// `scalar_t` is a scalar type equivalent of target tensor dtype -// `uint_t` is an unsigned integral type of sub-blocks that random state is divided to -// (e.g, 16 bytes random state block can be divided into 16 uint8_t sub-blocks -// or 8 uint16_t sub-block or 4 uint32_t sub-block or 2 uint64_t sub-blocks) -// `N` is a number of sub-block which is used by `transform_func` -// to generate a random value of specific distribution (e.g. `normal` uses 2) -// `key` is a CUDA pointer to random key memory block -// `transform_func` is a callable that converts N `uint_t` random state sub-blocks passed in RNGValues into target dtype -// `scalar_t` -template -void aes_helper(at::TensorIterator &iter, const uint8_t *key_bytes, transform_t transform_func) { - auto output = iter.tensor(0); - const auto output_offset_calc = make_offset_calculator<1>(at::TensorIterator::nullary_op(output)); - const auto output_index_calc = [output_offset_calc] TORCH_CSPRNG_HOST_DEVICE( - uint32_t li) -> uint32_t { return output_offset_calc.get(li)[0]; }; - torch::csprng::block_cipher( - nullptr, 0, 0, output_index_calc, output.data_ptr(), output.numel(), output.element_size(), output_index_calc, - iter.device_type(), - [key_bytes] TORCH_CSPRNG_HOST_DEVICE(int64_t idx, uint8_t * block) -> void { - uint8_t idx_block[aes::block_t_size]; - std::memset(&idx_block, 0, aes::block_t_size); - *(reinterpret_cast(idx_block)) = idx; - aes::encrypt(idx_block, key_bytes); - for (size_t i = 0; i < aes::block_t_size; i++) { - block[i] ^= idx_block[i]; - } - }, - aes::block_t_size / (N * sizeof(uint_t)), - [transform_func] TORCH_CSPRNG_HOST_DEVICE(uint8_t * block) { - const auto n = aes::block_t_size / (N * sizeof(uint_t)); - for (size_t i = 0; i < n; ++i) { - uint64_t vals[N]; - for (size_t j = 0; j < N; ++j) { - vals[j] = (reinterpret_cast(block))[N * i + j]; - } - RNGValues rng(vals); - reinterpret_cast(block)[i] = transform_func(&rng); - } - }); -} - -// ==================================================================================================================== - -// A mapping between scalar type and corresponding unsigned integer type of random state sub-block. -// uint64_t for double and long, uint32_t for the rest -template -struct UIntType {}; - -template <> -struct UIntType { - using type = uint64_t; -}; -template <> -struct UIntType { - using type = uint32_t; -}; -template <> -struct UIntType { - using type = uint16_t; -}; -template <> -struct UIntType { - using type = uint16_t; -}; -template <> -struct UIntType { - using type = uint64_t; -}; -template <> -struct UIntType { - using type = uint32_t; -}; -template <> -struct UIntType { - using type = uint32_t; -}; -template <> -struct UIntType { - using type = uint32_t; -}; -template <> -struct UIntType { - using type = uint32_t; -}; -template <> -struct UIntType { - using type = uint32_t; -}; - -// ==================================================== Random ======================================================== - -template -struct RandomKernel { - void operator()(TensorIterator &iter, c10::optional generator) { - const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); - const auto key = key_t.data_ptr(); - AT_DISPATCH_ALL_TYPES_AND3( - at::ScalarType::Bool, at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "random_kernel", [&] { - aes_helper::type>( - iter, key, [] TORCH_CSPRNG_HOST_DEVICE(RNGValues<1> * generator) -> scalar_t { - uniform_int_distribution random; - return random(generator); - }); - }); - } -}; - -template -void random_from_to_kernel_helper(TensorIterator &iter, uint64_t range, int64_t base, const uint8_t *key) { - aes_helper(iter, key, [range, base] TORCH_CSPRNG_HOST_DEVICE(RNGValues<1> * generator) -> scalar_t { - uniform_int_from_to_distribution random(range, base); - return random(generator); - }); -} - -template -void random_full_range_kernel_helper(TensorIterator &iter, const uint8_t *key) { - aes_helper(iter, key, [] TORCH_CSPRNG_HOST_DEVICE(RNGValues<1> * generator) -> scalar_t { - uniform_int_full_range_distribution random; - return random(generator); - }); -} - -template -struct RandomFromToKernel { - void operator()(TensorIterator &iter, uint64_t range, int64_t base, c10::optional generator) { - const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); - const auto key = key_t.data_ptr(); - AT_DISPATCH_ALL_TYPES_AND3( - at::ScalarType::Bool, at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "random_from_to_kernel", [&] { - if ((std::is_same::value || std::is_same::value || - std::is_same::value || - std::is_same::value) /* TODO: && range >= 1ULL << 32*/) { - random_from_to_kernel_helper(iter, range, base, key); - } else { - random_from_to_kernel_helper(iter, range, base, key); - } - }); - } - void operator()(TensorIterator &iter, c10::optional generator) { - const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); - const auto key = key_t.data_ptr(); - AT_DISPATCH_ALL_TYPES_AND3(at::ScalarType::Bool, at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), - "random_full_64_bits_range_kernel", [&] { - if (std::is_same::value || std::is_same::value || - std::is_same::value || std::is_same::value) { - random_full_range_kernel_helper(iter, key); - } else { - TORCH_CHECK(false, "random_full_64_bits_range_kernel_cuda handles only int64, double, float and bfloat16"); - } - }); - } -}; - -at::Tensor &random_(at::Tensor &self, c10::optional generator) { - return at::native::templates::random_impl(self, generator); -} - -at::Tensor &random_from_to( - at::Tensor &self, int64_t from, c10::optional to, c10::optional generator) { - return at::native::templates::random_from_to_impl(self, from, to, generator); -} - -at::Tensor &random_to(at::Tensor &self, int64_t to, c10::optional generator) { - return random_from_to(self, 0, to, generator); -} - -// ==================================================== Uniform ======================================================= - -template -struct UniformKernel { - void operator()(TensorIterator &iter, double from, double to, c10::optional generator) { - const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); - const auto key = key_t.data_ptr(); - AT_DISPATCH_FLOATING_TYPES_AND2( - at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "uniform_kernel", [&] { - aes_helper( - iter, key, [from, to] TORCH_CSPRNG_HOST_DEVICE(RNGValues<1> * generator) -> scalar_t { - uniform_real_distribution uniform(from, to); - return static_cast(uniform(generator)); - }); - }); - } -}; - -at::Tensor &uniform_(at::Tensor &self, double from, double to, c10::optional generator) { - return at::native::templates::uniform_impl_(self, from, to, generator); -} - -// ==================================================== Normal ======================================================== - -template -struct NormalKernel { - void operator()(Tensor &self, double mean, double std, c10::optional generator) { - auto iter = TensorIterator::nullary_op(self); - const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); - const auto key = key_t.data_ptr(); - AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "normal_kernel", [&] { - aes_helper( - iter, key, [mean, std] TORCH_CSPRNG_HOST_DEVICE(RNGValues<2> * gen) -> scalar_t { - normal_distribution normal(mean, std); - return static_cast(normal(gen)); - }); - }); - } -}; - -at::Tensor &normal_(at::Tensor &self, double mean, double std, c10::optional generator) { - return at::native::templates::normal_impl_(self, mean, std, generator); -} - -at::Tensor &normal_Tensor_float_out( - at::Tensor &output, const at::Tensor &mean, double std, c10::optional gen) { - return at::native::templates::normal_out_impl(output, mean, std, gen); -} - -at::Tensor &normal_float_Tensor_out( - at::Tensor &output, double mean, const at::Tensor &std, c10::optional gen) { - return at::native::templates::normal_out_impl(output, mean, std, gen); -} - -at::Tensor &normal_Tensor_Tensor_out( - at::Tensor &output, const at::Tensor &mean, const at::Tensor &std, c10::optional gen) { - return at::native::templates::normal_out_impl(output, mean, std, gen); -} - -at::Tensor normal_Tensor_float(const at::Tensor &mean, double std, c10::optional gen) { - return at::native::templates::normal_impl(mean, std, gen); -} - -at::Tensor normal_float_Tensor(double mean, const at::Tensor &std, c10::optional gen) { - return at::native::templates::normal_impl(mean, std, gen); -} - -at::Tensor normal_Tensor_Tensor(const at::Tensor &mean, const at::Tensor &std, c10::optional gen) { - return at::native::templates::normal_impl(mean, std, gen); -} - -// ==================================================== Cauchy ======================================================== - -template -struct CauchyKernel { - void operator()(TensorIterator &iter, double median, double sigma, c10::optional generator) { - const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); - const auto key = key_t.data_ptr(); - AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "cauchy_kernel", [&] { - aes_helper( - iter, key, [median, sigma] TORCH_CSPRNG_HOST_DEVICE(RNGValues<1> * gen) -> scalar_t { - cauchy_distribution cauchy(median, sigma); - return static_cast(cauchy(gen)); - }); - }); - } -}; - -at::Tensor &cauchy_(at::Tensor &self, double median, double sigma, c10::optional generator) { - return at::native::templates::cauchy_impl_(self, median, sigma, generator); -} - -// ================================================== LogNormal ======================================================= - -template -struct LogNormalKernel { - void operator()(TensorIterator &iter, double mean, double std, c10::optional generator) { - const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); - const auto key = key_t.data_ptr(); - AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "log_normal", [&] { - aes_helper( - iter, key, [mean, std] TORCH_CSPRNG_HOST_DEVICE(RNGValues<2> * gen) -> scalar_t { - lognormal_distribution logNormal(mean, std); - return static_cast(logNormal(gen)); - }); - }); - } -}; - -at::Tensor &log_normal_(at::Tensor &self, double mean, double std, c10::optional gen) { - return at::native::templates::log_normal_impl_(self, mean, std, gen); -} - -// ================================================== Geometric ======================================================= - -template -struct GeometricKernel { - void operator()(TensorIterator &iter, double p, c10::optional generator) { - const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); - const auto key = key_t.data_ptr(); - AT_DISPATCH_FLOATING_TYPES_AND2( - at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "geometric_kernel", [&] { - aes_helper::type, 1>( - iter, key, [p] TORCH_CSPRNG_HOST_DEVICE(RNGValues<1> * gen) -> scalar_t { - geometric_distribution geometric(p); - return geometric(gen); - }); - }); - } -}; - -at::Tensor &geometric_(at::Tensor &self, double p, c10::optional gen) { - return at::native::templates::geometric_impl_(self, p, gen); -} - -// ================================================== Exponential ===================================================== - -template -struct ExponentialKernel { - void operator()(TensorIterator &iter, double lambda, c10::optional generator) { - const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); - const auto key = key_t.data_ptr(); - AT_DISPATCH_FLOATING_TYPES_AND2( - at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "exponential_kernel", [&] { - aes_helper(iter, key, [lambda] TORCH_CSPRNG_HOST_DEVICE(RNGValues<1> * gen) -> scalar_t { - exponential_distribution exponential(lambda); - return static_cast(exponential(gen)); - }); - }); - } -}; - -at::Tensor &exponential_(at::Tensor &self, double lambda, c10::optional gen) { - return at::native::templates::exponential_impl_(self, lambda, gen); -} - // ================================================Encrypt/Decrypt===================================================== void check_cipher(const std::string &cipher, Tensor key) { @@ -407,32 +33,14 @@ void check_cipher(const std::string &cipher, Tensor key) { } void aes_ecb_encrypt(Tensor input, Tensor output, uint8_t *key_bytes) { - block_cipher(input, output, - [key_bytes] TORCH_CSPRNG_HOST_DEVICE(int64_t idx, uint8_t * block) -> void { aes::encrypt(block, key_bytes); }); -} - -void aes_ecb_decrypt(Tensor input, Tensor output, uint8_t *key_bytes) { - block_cipher(input, output, - [key_bytes] TORCH_CSPRNG_HOST_DEVICE(int64_t idx, uint8_t * block) -> void { aes::decrypt(block, key_bytes); }); -} - -void aes_ctr_encrypt(Tensor input, Tensor output, uint8_t *key_bytes) { + uint8_t round_key[aes::round_key_t_size]; + aes::KeyExpansion(round_key, key_bytes); block_cipher( - input, output, [key_bytes] TORCH_CSPRNG_HOST_DEVICE(int64_t idx, uint8_t * block) -> void { - uint8_t idx_block[aes::block_t_size]; - std::memset(&idx_block, 0, aes::block_t_size); - *(reinterpret_cast(idx_block)) = idx; - aes::encrypt(idx_block, key_bytes); - for (size_t i = 0; i < aes::block_t_size; i++) { - block[i] ^= idx_block[i]; - } + input, output, [round_key] TORCH_CSPRNG_HOST_DEVICE(int64_t idx, uint8_t * block) -> void { + owcf::matyas_meyer_oseas(block, round_key); }); } -void aes_ctr_decrypt(Tensor input, Tensor output, uint8_t *key_bytes) { - aes_ctr_encrypt(input, output, key_bytes); -} - Tensor encrypt(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode) { TORCH_CHECK(input.device() == output.device() && input.device() == key.device(), "input, output and key tensors must have the same device"); @@ -446,33 +54,8 @@ Tensor encrypt(Tensor input, Tensor output, Tensor key, const std::string &ciphe const auto key_bytes = reinterpret_cast(key.contiguous().data_ptr()); if (mode == "ecb") { aes_ecb_encrypt(input, output, key_bytes); - } else if (mode == "ctr") { - aes_ctr_encrypt(input, output, key_bytes); - } else { - TORCH_CHECK(false, "encrypt/decrypt supports \"ecb\" and \"ctr\" modes, \"", mode, "\" is not supported."); - } - return output; -} - -Tensor decrypt(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode) { - TORCH_CHECK(input.device() == output.device() && input.device() == key.device(), - "input, output and key tensors must have the same device"); - const auto output_size_bytes = output.numel() * output.itemsize(); - const auto input_size_bytes = input.numel() * input.itemsize(); - const auto diff = input_size_bytes - output_size_bytes; - TORCH_CHECK(0 <= diff && diff < aes::block_t_size, - "output tensor size in bytes must be less then or equal to input tensor size in bytes, the difference must be less " - "than block size"); - TORCH_CHECK(input_size_bytes % aes::block_t_size == 0, - "input tensor size in bytes must divisible by cipher block size in bytes"); - check_cipher(cipher, key); - const auto key_bytes = reinterpret_cast(key.contiguous().data_ptr()); - if (mode == "ecb") { - aes_ecb_decrypt(input, output, key_bytes); - } else if (mode == "ctr") { - aes_ctr_decrypt(input, output, key_bytes); } else { - TORCH_CHECK(false, "encrypt/decrypt supports \"ecb\" and \"ctr\" modes, \"", mode, "\" is not supported."); + TORCH_CHECK(false, "only supports \"ecb\" mode, \"", mode, "\" is not supported."); } return output; } diff --git a/torchcsprng/kernels.cuh b/torchcsprng/kernels.cuh index 4467e11..1c0ae97 100644 --- a/torchcsprng/kernels.cuh +++ b/torchcsprng/kernels.cuh @@ -23,59 +23,10 @@ namespace cuda { * LICENSE file in the root directory of this source tree. */ -// ==================================================== Random ======================================================== - -at::Tensor &random_(at::Tensor &self, c10::optional generator); - -at::Tensor &random_from_to( - at::Tensor &self, int64_t from, optional to, c10::optional generator); - -at::Tensor &random_to(at::Tensor &self, int64_t to, c10::optional generator); - -// ==================================================== Uniform ======================================================= - -at::Tensor &uniform_(at::Tensor &self, double from, double to, c10::optional generator); - -// ==================================================== Normal ======================================================== - -at::Tensor &normal_(at::Tensor &self, double mean, double std, c10::optional generator); - -at::Tensor &normal_Tensor_float_out( - at::Tensor &output, const at::Tensor &mean, double std, c10::optional gen); - -at::Tensor &normal_float_Tensor_out( - at::Tensor &output, double mean, const at::Tensor &std, c10::optional gen); - -at::Tensor &normal_Tensor_Tensor_out( - at::Tensor &output, const at::Tensor &mean, const at::Tensor &std, c10::optional gen); - -at::Tensor normal_Tensor_float(const at::Tensor &mean, double std, c10::optional gen); - -at::Tensor normal_float_Tensor(double mean, const at::Tensor &std, c10::optional gen); - -at::Tensor normal_Tensor_Tensor(const at::Tensor &mean, const at::Tensor &std, c10::optional gen); - -// ==================================================== Cauchy ======================================================== - -at::Tensor &cauchy_(at::Tensor &self, double median, double sigma, c10::optional generator); - -// ================================================== LogNormal ======================================================= - -at::Tensor &log_normal_(at::Tensor &self, double mean, double std, c10::optional gen); - -// ================================================== Geometric ======================================================= - -at::Tensor &geometric_(at::Tensor &self, double p, c10::optional gen); - -// ================================================== Exponential ===================================================== - -at::Tensor &exponential_(at::Tensor &self, double lambda, c10::optional gen); - // ================================================Encrypt/Decrypt===================================================== -Tensor encrypt(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode); - -Tensor decrypt(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode); +at::Tensor encrypt( + at::Tensor input, at::Tensor output, at::Tensor key, const std::string &cipher, const std::string &mode); // The original kernels_body.inc ends here diff --git a/torchcsprng/kernels_commons.h b/torchcsprng/kernels_commons.cuh similarity index 98% rename from torchcsprng/kernels_commons.h rename to torchcsprng/kernels_commons.cuh index 359ba2b..a5714a3 100644 --- a/torchcsprng/kernels_commons.h +++ b/torchcsprng/kernels_commons.cuh @@ -13,8 +13,8 @@ #include #include #include -#include "macros.h" -#include "block_cipher.h" +#include "macros.cuh" +#include "block_cipher.cuh" inline uint64_t make64BitsFrom32Bits(uint32_t hi, uint32_t lo) { return (static_cast(hi) << 32) | lo; diff --git a/torchcsprng/macros.h b/torchcsprng/macros.cuh similarity index 94% rename from torchcsprng/macros.h rename to torchcsprng/macros.cuh index d98d1bd..ccc64ed 100644 --- a/torchcsprng/macros.h +++ b/torchcsprng/macros.cuh @@ -11,6 +11,7 @@ #define TORCH_CSPRNG_HOST_DEVICE __host__ __device__ #define TORCH_CSPRNG_CONSTANT __constant__ #else +#error "CUDA not found" #define TORCH_CSPRNG_HOST_DEVICE #define TORCH_CSPRNG_CONSTANT #endif diff --git a/torchcsprng/owcf.cu b/torchcsprng/owcf.cu new file mode 100644 index 0000000..1c5ecda --- /dev/null +++ b/torchcsprng/owcf.cu @@ -0,0 +1,30 @@ +// Copyright (C) myl7 +// SPDX-License-Identifier: BSD-3-Clause + +#include "owcf.cuh" +#include "aes.cuh" + +namespace owcf { + +TORCH_CSPRNG_HOST_DEVICE inline void memcpy_block(uint32_t *dest, const uint32_t *src) { + dest[0] = src[0]; + dest[1] = src[1]; + dest[2] = src[2]; + dest[3] = src[3]; +} + +TORCH_CSPRNG_HOST_DEVICE inline void xor_block(uint32_t *dest, const uint32_t *src) { + dest[0] ^= src[0]; + dest[1] ^= src[1]; + dest[2] ^= src[2]; + dest[3] ^= src[3]; +} + +TORCH_CSPRNG_HOST_DEVICE void matyas_meyer_oseas(uint8_t *state, const uint8_t *round_key) { + uint32_t input[aes::block_t_size / sizeof(uint32_t)]; + memcpy_block(input, reinterpret_cast(state)); + aes::encrypt_with_round_key(state, round_key); + xor_block(reinterpret_cast(state), input); +} + +} // namespace owcf diff --git a/torchcsprng/owcf.cuh b/torchcsprng/owcf.cuh new file mode 100644 index 0000000..8f88d96 --- /dev/null +++ b/torchcsprng/owcf.cuh @@ -0,0 +1,17 @@ +// Copyright (C) myl7 +// SPDX-License-Identifier: BSD-3-Clause + +//! One-way compression functions + +#pragma once + +#include +#include +#include "macros.cuh" + +namespace owcf { + +/// Matyas–Meyer–Oseas based on AES128 +TORCH_CSPRNG_HOST_DEVICE void matyas_meyer_oseas(uint8_t *state, const uint8_t *round_key); + +} // namespace owcf From a2fd7600de2d15c0a7a8f1e68ddc40e8c1004cbd Mon Sep 17 00:00:00 2001 From: myl7 Date: Mon, 22 Jan 2024 10:10:51 +0800 Subject: [PATCH 06/10] Remove part of pytorch dep Update cmake config to exclude headers --- CMakeLists.txt | 9 ---- lib.cpp | 11 ++--- torchcsprng/block_cipher.cuh | 24 +++------- torchcsprng/kernels.cu | 46 +++++++++---------- torchcsprng/kernels.cuh | 6 +-- torchcsprng/kernels_commons.cuh | 78 --------------------------------- 6 files changed, 34 insertions(+), 140 deletions(-) delete mode 100644 torchcsprng/kernels_commons.cuh diff --git a/CMakeLists.txt b/CMakeLists.txt index 1899a4b..854c104 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,18 +26,9 @@ include_directories(${PYTHON_INCLUDE_DIRS}) add_library( fssprgcuda SHARED lib.cpp - lib.h torchcsprng/kernels.cu - torchcsprng/kernels.cuh - torchcsprng/macros.cuh - torchcsprng/kernels_commons.cuh - torchcsprng/block_cipher.cuh - torchcsprng/OffsetCalculator.cuh - torchcsprng/THCIntegerDivider.cuh torchcsprng/owcf.cu - torchcsprng/owcf.cuh torchcsprng/aes.cu - torchcsprng/aes.cuh ) set_target_properties(fssprgcuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON) target_link_libraries(fssprgcuda "${TORCH_LIBRARIES}") diff --git a/lib.cpp b/lib.cpp index dfb406e..dfca417 100644 --- a/lib.cpp +++ b/lib.cpp @@ -6,21 +6,18 @@ using torch::Tensor; -extern Tensor encrypt(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode); +extern Tensor encrypt(Tensor buf, const uint8_t *key, size_t key_size, const std::string &cipher); constexpr size_t block_t_size = 16; -void csprng_matyas_meyer_oseas_aes128(uint8_t *buf, int64_t buf_size, const uint8_t *key) { +void csprng_matyas_meyer_oseas_aes128(uint8_t *buf, size_t buf_size, const uint8_t *key, size_t key_size) { const auto input_size_bytes = buf_size; TORCH_CHECK(input_size_bytes % block_t_size == 0, "input size in bytes(", input_size_bytes, ") is not a multiple of block size(", block_t_size, ")"); - Tensor input = torch::from_blob(buf, {input_size_bytes}, torch::kUInt8).to(torch::kCUDA); - - const auto output_size_bytes = input_size_bytes; - Tensor output = torch::empty({output_size_bytes}, torch::kUInt8); + Tensor input = torch::from_blob(buf, {static_cast(input_size_bytes)}, torch::kUInt8).to(torch::kCUDA); const auto key_size_bytes = 16; Tensor key_tensor = torch::from_blob(const_cast(key), {key_size_bytes}, torch::kUInt8).to(torch::kCUDA); - encrypt(input, output, key_tensor, "aes128", "ecb"); + const auto output = encrypt(input, key, key_size, "aes128"); } diff --git a/torchcsprng/block_cipher.cuh b/torchcsprng/block_cipher.cuh index d36bf94..5e0f27c 100644 --- a/torchcsprng/block_cipher.cuh +++ b/torchcsprng/block_cipher.cuh @@ -62,7 +62,8 @@ TORCH_CSPRNG_HOST_DEVICE static void block_cipher_kernel_helper(int64_t idx, cip void *input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, void *output_ptr, int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc, transform_t transform) { uint8_t block[block_size]; - std::memset(&block, 0, block_size); // is it ok to use zeros as padding? + // std::memset(&block, 0, block_size); // is it ok to use zeros as padding? + // No need to pad because we ensure `input_size_bytes % block_t_size == 0` previously in lib.cpp. // In this application, we require users to pass in the input that is a multiple of block_size. // So zero padding never actually happens and it is ok. if (input_ptr != nullptr) { @@ -118,7 +119,7 @@ void block_cipher(void *input_ptr, int64_t input_numel, int input_type_size, inp } template -void block_cipher(at::Tensor input, at::Tensor output, cipher_t cipher) { +void block_cipher(at::Tensor input, cipher_t cipher) { const auto input_ptr = input.data_ptr(); const auto input_numel = input.numel(); @@ -132,23 +133,10 @@ void block_cipher(at::Tensor input, at::Tensor output, cipher_t cipher) { const auto input_index_calc = [input_offset_calc] TORCH_CSPRNG_HOST_DEVICE( uint32_t li) -> uint32_t { return input_offset_calc.get(li)[0]; }; - const auto output_ptr = output.data_ptr(); - const auto output_numel = output.numel(); + const auto device = input.device(); - // Otherwise OffsetCalculator/IntDivider crashes with integer division by zero - if (output_ptr == nullptr || output_numel == 0) { - return; - } - - const auto output_type_size = output.element_size(); - const auto output_offset_calc = make_offset_calculator<1>(at::TensorIterator::nullary_op(output)); - const auto output_index_calc = [output_offset_calc] TORCH_CSPRNG_HOST_DEVICE( - uint32_t li) -> uint32_t { return output_offset_calc.get(li)[0]; }; - - const auto device = output.device(); - - torch::csprng::block_cipher(input_ptr, input_numel, input_type_size, input_index_calc, output_ptr, - output_numel, output_type_size, output_index_calc, device, cipher, block_size / output_type_size, + torch::csprng::block_cipher(input_ptr, input_numel, input_type_size, input_index_calc, input_ptr, + input_numel, input_type_size, input_index_calc, device, cipher, block_size / input_type_size, [] TORCH_CSPRNG_HOST_DEVICE(uint8_t * x) {}); } diff --git a/torchcsprng/kernels.cu b/torchcsprng/kernels.cu index f55153d..6f1567f 100644 --- a/torchcsprng/kernels.cu +++ b/torchcsprng/kernels.cu @@ -5,10 +5,14 @@ * LICENSE file in the root directory of this source tree. */ -#include "kernels_commons.cuh" +#include "kernels.cuh" +#include +#include "block_cipher.cuh" #include "aes.cuh" #include "owcf.cuh" +using at::Tensor; + namespace torch { namespace csprng { namespace cuda { @@ -24,40 +28,32 @@ namespace cuda { // ================================================Encrypt/Decrypt===================================================== -void check_cipher(const std::string &cipher, Tensor key) { +void check_cipher(const std::string &cipher, size_t key_size) { if (cipher == "aes128") { - TORCH_CHECK(key.element_size() * key.numel() == 16, "key tensor must have 16 bytes(128 bits)"); + TORCH_CHECK(key_size == 16, "key tensor must have 16 bytes(128 bits)"); } else { TORCH_CHECK(false, "encrypt/decrypt supports \"aes128\" cipher, \"", cipher, "\" is not supported."); } } -void aes_ecb_encrypt(Tensor input, Tensor output, uint8_t *key_bytes) { +void aes_ecb_encrypt(Tensor input, const uint8_t *key_bytes) { uint8_t round_key[aes::round_key_t_size]; aes::KeyExpansion(round_key, key_bytes); - block_cipher( - input, output, [round_key] TORCH_CSPRNG_HOST_DEVICE(int64_t idx, uint8_t * block) -> void { - owcf::matyas_meyer_oseas(block, round_key); - }); + block_cipher(input, [round_key] TORCH_CSPRNG_HOST_DEVICE(int64_t idx, uint8_t * block) -> void { + owcf::matyas_meyer_oseas(block, round_key); + }); } -Tensor encrypt(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode) { - TORCH_CHECK(input.device() == output.device() && input.device() == key.device(), - "input, output and key tensors must have the same device"); - const auto output_size_bytes = output.numel() * output.itemsize(); - const auto input_size_bytes = input.numel() * input.itemsize(); - const auto input_size_bytes_rounded = - (input_size_bytes + aes::block_t_size - 1) / aes::block_t_size * aes::block_t_size; - TORCH_CHECK(output_size_bytes == input_size_bytes_rounded, "output size in bytes(", output_size_bytes, - ") is not equal to input size in bytes rounded to block size(", input_size_bytes_rounded, ")"); - check_cipher(cipher, key); - const auto key_bytes = reinterpret_cast(key.contiguous().data_ptr()); - if (mode == "ecb") { - aes_ecb_encrypt(input, output, key_bytes); - } else { - TORCH_CHECK(false, "only supports \"ecb\" mode, \"", mode, "\" is not supported."); - } - return output; +Tensor encrypt(Tensor buf, const uint8_t *key, size_t key_size, const std::string &cipher) { + const auto input_size_bytes = buf.numel() * buf.itemsize(); + // const auto input_size_bytes_rounded = + // (input_size_bytes + aes::block_t_size - 1) / aes::block_t_size * aes::block_t_size; + // TORCH_CHECK(output_size_bytes == input_size_bytes_rounded, "output size in bytes(", output_size_bytes, + // ") is not equal to input size in bytes rounded to block size(", input_size_bytes_rounded, ")"); + // No need to check because we ensure `input_size_bytes % block_t_size == 0` previously in lib.cpp. + check_cipher(cipher, key_size); + aes_ecb_encrypt(buf, key); + return buf; } // The original kernels_body.inc ends here diff --git a/torchcsprng/kernels.cuh b/torchcsprng/kernels.cuh index 1c0ae97..6dab64f 100644 --- a/torchcsprng/kernels.cuh +++ b/torchcsprng/kernels.cuh @@ -7,9 +7,10 @@ #pragma once -#include #include +using at::Tensor; + namespace torch { namespace csprng { namespace cuda { @@ -25,8 +26,7 @@ namespace cuda { // ================================================Encrypt/Decrypt===================================================== -at::Tensor encrypt( - at::Tensor input, at::Tensor output, at::Tensor key, const std::string &cipher, const std::string &mode); +Tensor encrypt(Tensor buf, const uint8_t *key, size_t key_size, const std::string &cipher); // The original kernels_body.inc ends here diff --git a/torchcsprng/kernels_commons.cuh b/torchcsprng/kernels_commons.cuh deleted file mode 100644 index a5714a3..0000000 --- a/torchcsprng/kernels_commons.cuh +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include "macros.cuh" -#include "block_cipher.cuh" - -inline uint64_t make64BitsFrom32Bits(uint32_t hi, uint32_t lo) { - return (static_cast(hi) << 32) | lo; -} - -// CUDA CSPRNG is actually CPU generator which is used only to generate a random key on CPU for AES running in a block -// mode on CUDA -struct CSPRNGGeneratorImpl : public c10::GeneratorImpl { - CSPRNGGeneratorImpl(bool use_rd) - : c10::GeneratorImpl{at::Device(at::DeviceType::CPU), at::DispatchKeySet(at::DispatchKey::CustomRNGKeyId)}, - use_rd_{use_rd} {} - CSPRNGGeneratorImpl(const std::string &token) - : c10::GeneratorImpl{at::Device(at::DeviceType::CPU), at::DispatchKeySet(at::DispatchKey::CustomRNGKeyId)}, - use_rd_{true}, - rd_{token} {} - CSPRNGGeneratorImpl(uint64_t seed) - : c10::GeneratorImpl{at::Device(at::DeviceType::CPU), at::DispatchKeySet(at::DispatchKey::CustomRNGKeyId)}, - use_rd_{false}, - mt_{static_cast(seed)} {} - ~CSPRNGGeneratorImpl() = default; - uint32_t random() { - return use_rd_ ? rd_() : mt_(); - } - uint64_t random64() { - return use_rd_ ? make64BitsFrom32Bits(rd_(), rd_()) : make64BitsFrom32Bits(mt_(), mt_()); - } - - void set_current_seed(uint64_t seed) override { - throw std::runtime_error("not implemented"); - } - uint64_t current_seed() const override { - throw std::runtime_error("not implemented"); - } - uint64_t seed() override { - throw std::runtime_error("not implemented"); - } - CSPRNGGeneratorImpl *clone_impl() const override { - throw std::runtime_error("not implemented"); - } - - static at::DeviceType device_type() { - return at::DeviceType::CPU; - } - - void set_state(const c10::TensorImpl &new_state) override { - throw std::runtime_error("not implemented"); - } - c10::intrusive_ptr get_state() const override { - throw std::runtime_error("not implemented"); - } - - void set_offset(uint64_t offset) override { - throw std::runtime_error("not implemented"); - } - uint64_t get_offset() const override { - throw std::runtime_error("not implenented"); - } - bool use_rd_; - std::random_device rd_; - std::mt19937 mt_; -}; From 6089f1c0b4572705649cd99e76811490a7b9357c Mon Sep 17 00:00:00 2001 From: myl7 Date: Mon, 22 Jan 2024 10:49:41 +0800 Subject: [PATCH 07/10] Rearrange cpp cmake package --- CMakeLists.txt | 11 ++++++----- include/fssprgcuda.h | 12 ++++++++++++ lib.h | 8 -------- lib.cpp => src/fssprgcuda.cpp | 12 ++++++++---- .../torchcsprng}/OffsetCalculator.cuh | 0 .../torchcsprng}/THCIntegerDivider.cuh | 0 {torchcsprng => src/torchcsprng}/aes.cu | 0 {torchcsprng => src/torchcsprng}/aes.cuh | 0 {torchcsprng => src/torchcsprng}/block_cipher.cuh | 0 {torchcsprng => src/torchcsprng}/kernels.cu | 0 {torchcsprng => src/torchcsprng}/kernels.cuh | 0 {torchcsprng => src/torchcsprng}/macros.cuh | 0 {torchcsprng => src/torchcsprng}/owcf.cu | 0 {torchcsprng => src/torchcsprng}/owcf.cuh | 0 {torchcsprng => src/torchcsprng}/version.txt | 0 15 files changed, 26 insertions(+), 17 deletions(-) create mode 100644 include/fssprgcuda.h delete mode 100644 lib.h rename lib.cpp => src/fssprgcuda.cpp (73%) rename {torchcsprng => src/torchcsprng}/OffsetCalculator.cuh (100%) rename {torchcsprng => src/torchcsprng}/THCIntegerDivider.cuh (100%) rename {torchcsprng => src/torchcsprng}/aes.cu (100%) rename {torchcsprng => src/torchcsprng}/aes.cuh (100%) rename {torchcsprng => src/torchcsprng}/block_cipher.cuh (100%) rename {torchcsprng => src/torchcsprng}/kernels.cu (100%) rename {torchcsprng => src/torchcsprng}/kernels.cuh (100%) rename {torchcsprng => src/torchcsprng}/macros.cuh (100%) rename {torchcsprng => src/torchcsprng}/owcf.cu (100%) rename {torchcsprng => src/torchcsprng}/owcf.cuh (100%) rename {torchcsprng => src/torchcsprng}/version.txt (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 854c104..e29a6e4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,11 +25,12 @@ include_directories(${PYTHON_INCLUDE_DIRS}) add_library( fssprgcuda SHARED - lib.cpp - torchcsprng/kernels.cu - torchcsprng/owcf.cu - torchcsprng/aes.cu + src/fssprgcuda.cpp + src/torchcsprng/kernels.cu + src/torchcsprng/owcf.cu + src/torchcsprng/aes.cu ) set_target_properties(fssprgcuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON) -target_link_libraries(fssprgcuda "${TORCH_LIBRARIES}") +target_include_directories(fssprgcuda PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include") +target_link_libraries(fssprgcuda PRIVATE "${TORCH_LIBRARIES}") target_compile_options(fssprgcuda PRIVATE $<$: --expt-extended-lambda>) diff --git a/include/fssprgcuda.h b/include/fssprgcuda.h new file mode 100644 index 0000000..5907d1d --- /dev/null +++ b/include/fssprgcuda.h @@ -0,0 +1,12 @@ +// Copyright (C) myl7 +// SPDX-License-Identifier: BSD-3-Clause + +#pragma once + +#include + +namespace fssprgcuda { + +void matyas_meyer_oseas_aes128(uint8_t *buf, int64_t buf_size, const uint8_t *key); + +} // namespace fssprgcuda diff --git a/lib.h b/lib.h deleted file mode 100644 index b809e76..0000000 --- a/lib.h +++ /dev/null @@ -1,8 +0,0 @@ -// Copyright (C) myl7 -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include - -void csprng_matyas_meyer_oseas_aes128(uint8_t *buf, int64_t buf_size, const uint8_t *key); diff --git a/lib.cpp b/src/fssprgcuda.cpp similarity index 73% rename from lib.cpp rename to src/fssprgcuda.cpp index dfca417..4836b3e 100644 --- a/lib.cpp +++ b/src/fssprgcuda.cpp @@ -1,16 +1,18 @@ // Copyright (C) myl7 // SPDX-License-Identifier: BSD-3-Clause -#include "lib.h" +#include "fssprgcuda.h" #include +#include "torchcsprng/kernels.cuh" using torch::Tensor; - -extern Tensor encrypt(Tensor buf, const uint8_t *key, size_t key_size, const std::string &cipher); +using torch::csprng::cuda::encrypt; constexpr size_t block_t_size = 16; -void csprng_matyas_meyer_oseas_aes128(uint8_t *buf, size_t buf_size, const uint8_t *key, size_t key_size) { +namespace fssprgcuda { + +void matyas_meyer_oseas_aes128(uint8_t *buf, size_t buf_size, const uint8_t *key, size_t key_size) { const auto input_size_bytes = buf_size; TORCH_CHECK(input_size_bytes % block_t_size == 0, "input size in bytes(", input_size_bytes, ") is not a multiple of block size(", block_t_size, ")"); @@ -21,3 +23,5 @@ void csprng_matyas_meyer_oseas_aes128(uint8_t *buf, size_t buf_size, const uint8 const auto output = encrypt(input, key, key_size, "aes128"); } + +} // namespace fssprgcuda diff --git a/torchcsprng/OffsetCalculator.cuh b/src/torchcsprng/OffsetCalculator.cuh similarity index 100% rename from torchcsprng/OffsetCalculator.cuh rename to src/torchcsprng/OffsetCalculator.cuh diff --git a/torchcsprng/THCIntegerDivider.cuh b/src/torchcsprng/THCIntegerDivider.cuh similarity index 100% rename from torchcsprng/THCIntegerDivider.cuh rename to src/torchcsprng/THCIntegerDivider.cuh diff --git a/torchcsprng/aes.cu b/src/torchcsprng/aes.cu similarity index 100% rename from torchcsprng/aes.cu rename to src/torchcsprng/aes.cu diff --git a/torchcsprng/aes.cuh b/src/torchcsprng/aes.cuh similarity index 100% rename from torchcsprng/aes.cuh rename to src/torchcsprng/aes.cuh diff --git a/torchcsprng/block_cipher.cuh b/src/torchcsprng/block_cipher.cuh similarity index 100% rename from torchcsprng/block_cipher.cuh rename to src/torchcsprng/block_cipher.cuh diff --git a/torchcsprng/kernels.cu b/src/torchcsprng/kernels.cu similarity index 100% rename from torchcsprng/kernels.cu rename to src/torchcsprng/kernels.cu diff --git a/torchcsprng/kernels.cuh b/src/torchcsprng/kernels.cuh similarity index 100% rename from torchcsprng/kernels.cuh rename to src/torchcsprng/kernels.cuh diff --git a/torchcsprng/macros.cuh b/src/torchcsprng/macros.cuh similarity index 100% rename from torchcsprng/macros.cuh rename to src/torchcsprng/macros.cuh diff --git a/torchcsprng/owcf.cu b/src/torchcsprng/owcf.cu similarity index 100% rename from torchcsprng/owcf.cu rename to src/torchcsprng/owcf.cu diff --git a/torchcsprng/owcf.cuh b/src/torchcsprng/owcf.cuh similarity index 100% rename from torchcsprng/owcf.cuh rename to src/torchcsprng/owcf.cuh diff --git a/torchcsprng/version.txt b/src/torchcsprng/version.txt similarity index 100% rename from torchcsprng/version.txt rename to src/torchcsprng/version.txt From b666ab1e0faadbcc534bb9508342a0174aa25215 Mon Sep 17 00:00:00 2001 From: myl7 Date: Fri, 12 Apr 2024 12:13:07 +0800 Subject: [PATCH 08/10] Drop dep torch --- CMakeLists.txt | 24 ++---- src/fssprgcuda.cpp | 14 +--- src/torchcsprng/OffsetCalculator.cuh | 108 -------------------------- src/torchcsprng/block_cipher.cuh | 110 +++++++++------------------ src/torchcsprng/kernels.cu | 30 +++----- src/torchcsprng/kernels.cuh | 8 +- 6 files changed, 57 insertions(+), 237 deletions(-) delete mode 100644 src/torchcsprng/OffsetCalculator.cuh diff --git a/CMakeLists.txt b/CMakeLists.txt index e29a6e4..02b5ace 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,30 +1,17 @@ -# Copyright (C) myl7 -# SPDX-License-Identifier: BSD-3-Clause - +# For Ubuntu Jammy 22.04 so far cmake_minimum_required(VERSION 3.22) # Set env `CUDACXX=/absolute/path/to/nvcc` to enable the CUDA language # if nvcc is not in the PATH. project(fss-prg-cuda LANGUAGES CUDA CXX) set(CMAKE_CXX_STANDARD 17) +# If the builder is confused by a soft link from /usr/local/cuda-* to /usr/local/cuda, +# pass `-DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-*` to CMake to locate CUDA. include(CheckLanguage) check_language(CUDA) -# Pass `-DCMAKE_PREFIX_PATH=/absolute/path/to/libtorch` to cmake to locate LibTorch. -# If the package cmake config is confused with a soft link from /usr/local/cuda-* to /usr/local/cuda, -# pass `-DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-*` to cmake to locate CUDA. -# Pass `-DGPU_ARCHS=` to cmake to specify the CPU archtecture. -# You can check the value alternatively at https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ . -# This value set by LibTorch (version that supports CUDA 12.1) has arch that is not supported by CUDA 12.1 instead. -find_package(Torch REQUIRED) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") - -# Fix Python.h not found even though it is just located in /usr/include/python* -find_package(PythonLibs REQUIRED) -include_directories(${PYTHON_INCLUDE_DIRS}) - add_library( - fssprgcuda SHARED + fssprgcuda src/fssprgcuda.cpp src/torchcsprng/kernels.cu src/torchcsprng/owcf.cu @@ -32,5 +19,4 @@ add_library( ) set_target_properties(fssprgcuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON) target_include_directories(fssprgcuda PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include") -target_link_libraries(fssprgcuda PRIVATE "${TORCH_LIBRARIES}") -target_compile_options(fssprgcuda PRIVATE $<$: --expt-extended-lambda>) +target_compile_options(fssprgcuda PRIVATE $<$:--extended-lambda>) diff --git a/src/fssprgcuda.cpp b/src/fssprgcuda.cpp index 4836b3e..192c8fd 100644 --- a/src/fssprgcuda.cpp +++ b/src/fssprgcuda.cpp @@ -2,26 +2,16 @@ // SPDX-License-Identifier: BSD-3-Clause #include "fssprgcuda.h" -#include #include "torchcsprng/kernels.cuh" -using torch::Tensor; using torch::csprng::cuda::encrypt; constexpr size_t block_t_size = 16; namespace fssprgcuda { -void matyas_meyer_oseas_aes128(uint8_t *buf, size_t buf_size, const uint8_t *key, size_t key_size) { - const auto input_size_bytes = buf_size; - TORCH_CHECK(input_size_bytes % block_t_size == 0, "input size in bytes(", input_size_bytes, - ") is not a multiple of block size(", block_t_size, ")"); - Tensor input = torch::from_blob(buf, {static_cast(input_size_bytes)}, torch::kUInt8).to(torch::kCUDA); - - const auto key_size_bytes = 16; - Tensor key_tensor = torch::from_blob(const_cast(key), {key_size_bytes}, torch::kUInt8).to(torch::kCUDA); - - const auto output = encrypt(input, key, key_size, "aes128"); +int matyas_meyer_oseas_aes128(uint8_t *buf, size_t buf_size, const uint8_t *key, size_t key_size) { + return encrypt(buf, buf_size, key, key_size, "aes128"); } } // namespace fssprgcuda diff --git a/src/torchcsprng/OffsetCalculator.cuh b/src/torchcsprng/OffsetCalculator.cuh deleted file mode 100644 index 15fcd0e..0000000 --- a/src/torchcsprng/OffsetCalculator.cuh +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include "THCIntegerDivider.cuh" - -/// OffsetCalculator calculates the offset in bytes of a linear index for NARGS -/// operands that share the same shape, but may have different strides. - -#ifdef __HIP_PLATFORM_HCC__ -constexpr int MAX_DIMS = 16; -#else -constexpr int MAX_DIMS = 25; -#endif - -template -struct OffsetCalculator { - // The offset for each argument. Wrapper around fixed-size array. - // On CUDA, zero sized array is not allowed, so when we are handling nullary - // operators, we need to create a size 1 offset to avoid compiler failure. - // This size 1 offset is just a placeholder, and we will not use it. - using offset_type = at::detail::Array(NARGS, 1)>; - - // if element_sizes is nullptr, then the strides will be in bytes, otherwise - // the strides will be in # of elements. - OffsetCalculator( - int dims, const int64_t *sizes, const int64_t *const *strides, const int64_t *element_sizes = nullptr) - : dims(dims) { - TORCH_CHECK(dims <= MAX_DIMS, "tensor has too many (>", MAX_DIMS, ") dims"); - for (int i = 0; i < MAX_DIMS; ++i) { - if (i < dims) { - sizes_[i] = IntDivider(sizes[i]); - } else { - sizes_[i] = IntDivider(1); - } - for (int arg = 0; arg < NARGS; arg++) { - int64_t element_size = (element_sizes == nullptr ? 1LL : element_sizes[arg]); - strides_[i][arg] = i < dims ? strides[arg][i] / element_size : 0; - } - } - } - - C10_HOST_DEVICE offset_type get(index_t linear_idx) const { - offset_type offsets; -#pragma unroll - for (int arg = 0; arg < NARGS; arg++) { - offsets[arg] = 0; - } - -#pragma unroll - for (int dim = 0; dim < MAX_DIMS; ++dim) { - if (dim == dims) { - break; - } - auto divmod = sizes_[dim].divmod(linear_idx); - linear_idx = divmod.div; - -#pragma unroll - for (int arg = 0; arg < NARGS; arg++) { - offsets[arg] += divmod.mod * strides_[dim][arg]; - } - } - return offsets; - } - - int dims; - IntDivider sizes_[MAX_DIMS]; - index_t strides_[MAX_DIMS][std::max(NARGS, 1)]; -}; - -template -struct TrivialOffsetCalculator { - // The offset for each argument. Wrapper around fixed-size array. - // The offsets are in # of elements, not in bytes. - // On CUDA, zero sized array is not allowed, so when we are handling nullary - // operators, we need to create a size 1 offset to avoid compiler failure. - // This size 1 offset is just a placeholder, and we will not use it. - using offset_type = at::detail::Array(NARGS, 1)>; - - C10_HOST_DEVICE offset_type get(index_t linear_idx) const { - offset_type offsets; -#pragma unroll - for (int arg = 0; arg < NARGS; arg++) { - offsets[arg] = linear_idx; - } - return offsets; - } -}; - -template -static OffsetCalculator make_offset_calculator(const at::TensorIterator &iter) { - AT_ASSERT(N <= iter.ntensors()); - std::array strides; - for (int i = 0; i < N; i++) { - strides[i] = iter.strides(i).data(); - } - return OffsetCalculator(iter.ndim(), iter.shape().data(), strides.data()); -} diff --git a/src/torchcsprng/block_cipher.cuh b/src/torchcsprng/block_cipher.cuh index 5e0f27c..8c922e3 100644 --- a/src/torchcsprng/block_cipher.cuh +++ b/src/torchcsprng/block_cipher.cuh @@ -8,136 +8,98 @@ #pragma once #include "macros.cuh" -#include -#include -#include "OffsetCalculator.cuh" -#include #include -#include - -#if defined(__CUDACC__) || defined(__HIPCC__) -#include -#include -#else -#error "CUDA not found" -#endif - -#if defined(__CUDACC__) || defined(__HIPCC__) -#define UNROLL_IF_CUDA #pragma unroll -#else -#error "CUDA not found" -#define UNROLL_IF_CUDA -#endif +#include +#include namespace torch { namespace csprng { -template -TORCH_CSPRNG_HOST_DEVICE static void copy_input_to_block(int64_t idx, uint8_t *block, int block_size, void *input_ptr, - int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc) { +TORCH_CSPRNG_HOST_DEVICE static void copy_input_to_block( + int64_t idx, uint8_t *block, int block_size, void *input_ptr, int64_t input_numel, int input_type_size) { for (auto i = 0; i < block_size / input_type_size; ++i) { const auto linear_index = idx * (block_size / input_type_size) + i; if (linear_index < input_numel) { - std::memcpy(block + i * input_type_size, - &(reinterpret_cast(input_ptr)[input_index_calc(linear_index)]), input_type_size); + std::memcpy( + block + i * input_type_size, &(reinterpret_cast(input_ptr)[linear_index]), input_type_size); } } } -template TORCH_CSPRNG_HOST_DEVICE static void copy_block_to_output(int64_t idx, uint8_t *block, int output_elem_per_block, - void *output_ptr, int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc) { + void *output_ptr, int64_t output_numel, int output_type_size) { for (auto i = 0; i < output_elem_per_block; ++i) { const auto linear_index = idx * output_elem_per_block + i; if (linear_index < output_numel) { - std::memcpy(&(reinterpret_cast(output_ptr)[output_index_calc(linear_index)]), - block + i * output_type_size, output_type_size); + std::memcpy( + &(reinterpret_cast(output_ptr)[linear_index]), block + i * output_type_size, output_type_size); } } } -template +template TORCH_CSPRNG_HOST_DEVICE static void block_cipher_kernel_helper(int64_t idx, cipher_t cipher, int output_elem_per_block, - void *input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, void *output_ptr, - int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc, transform_t transform) { + void *input_ptr, int64_t input_numel, int input_type_size, void *output_ptr, int64_t output_numel, + int output_type_size, transform_t transform) { uint8_t block[block_size]; // std::memset(&block, 0, block_size); // is it ok to use zeros as padding? // No need to pad because we ensure `input_size_bytes % block_t_size == 0` previously in lib.cpp. // In this application, we require users to pass in the input that is a multiple of block_size. // So zero padding never actually happens and it is ok. if (input_ptr != nullptr) { - copy_input_to_block(idx, block, block_size, input_ptr, input_numel, input_type_size, input_index_calc); + copy_input_to_block(idx, block, block_size, input_ptr, input_numel, input_type_size); } cipher(idx, block); transform(block); - copy_block_to_output( - idx, block, output_elem_per_block, output_ptr, output_numel, output_type_size, output_index_calc); + copy_block_to_output(idx, block, output_elem_per_block, output_ptr, output_numel, output_type_size); } #if defined(__CUDACC__) || defined(__HIPCC__) -template +template __global__ static void block_cipher_kernel_cuda(cipher_t cipher, int output_elem_per_block, void *input_ptr, - int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, void *output_ptr, int64_t output_numel, - int output_type_size, output_index_calc_t output_index_calc, transform_t transform) { + int64_t input_numel, int input_type_size, void *output_ptr, int64_t output_numel, int output_type_size, + transform_t transform) { const auto idx = blockIdx.x * blockDim.x + threadIdx.x; block_cipher_kernel_helper(idx, cipher, output_elem_per_block, input_ptr, input_numel, input_type_size, - input_index_calc, output_ptr, output_numel, output_type_size, output_index_calc, transform); + output_ptr, output_numel, output_type_size, transform); } #else #error "CUDA not found" #endif -template -void block_cipher(void *input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, - void *output_ptr, int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc, - at::Device device, cipher_t cipher, int output_elem_per_block, transform_t transform_func) { +template +int block_cipher(void *input_ptr, int64_t input_numel, int input_type_size, void *output_ptr, int64_t output_numel, + int output_type_size, cipher_t cipher, int output_elem_per_block, transform_t transform_func) { if (output_ptr == nullptr || output_numel == 0) { - return; + return -1; } - if (device.type() == at::kCPU) { - TORCH_CHECK(false, "torchcsprng was compiled with only CUDA support"); - } else if (device.type() == at::kCUDA) { #if defined(__CUDACC__) || defined(__HIPCC__) - const auto threads = 256; - const auto grid = (output_numel + (threads * output_elem_per_block) - 1) / (threads * output_elem_per_block); - auto stream = at::cuda::getCurrentCUDAStream(); - block_cipher_kernel_cuda<<>>(cipher, output_elem_per_block, input_ptr, - input_numel, input_type_size, input_index_calc, output_ptr, output_numel, output_type_size, output_index_calc, - transform_func); - AT_CUDA_CHECK(cudaGetLastError()); + const auto threads = 256; + const auto grid = (output_numel + (threads * output_elem_per_block) - 1) / (threads * output_elem_per_block); + block_cipher_kernel_cuda<<>>(cipher, output_elem_per_block, input_ptr, input_numel, + input_type_size, output_ptr, output_numel, output_type_size, transform_func); + return cudaGetLastError(); #else #error "CUDA not found" - TORCH_CHECK(false, "torchcsprng was compiled without CUDA support"); #endif - } else { - TORCH_CHECK(false, "block_cipher supports only CPU and CUDA devices"); - } } template -void block_cipher(at::Tensor input, cipher_t cipher) { - const auto input_ptr = input.data_ptr(); - const auto input_numel = input.numel(); +int block_cipher(uint8_t *buf, size_t buf_size, cipher_t cipher) { + // We have ensured `buf_size % 16 == 0` in front + const auto input_ptr = reinterpret_cast(buf); + const auto input_numel = buf_size / 4; - // Otherwise OffsetCalculator/IntDivider crashes with integer division by zero + // Otherwise IntDivider crashes with integer division by zero if (input_ptr == nullptr || input_numel == 0) { - return; + return -1; } - const auto input_type_size = input.element_size(); - const auto input_offset_calc = make_offset_calculator<1>(at::TensorIterator::nullary_op(input)); - const auto input_index_calc = [input_offset_calc] TORCH_CSPRNG_HOST_DEVICE( - uint32_t li) -> uint32_t { return input_offset_calc.get(li)[0]; }; - - const auto device = input.device(); + const auto input_type_size = 4; - torch::csprng::block_cipher(input_ptr, input_numel, input_type_size, input_index_calc, input_ptr, - input_numel, input_type_size, input_index_calc, device, cipher, block_size / input_type_size, - [] TORCH_CSPRNG_HOST_DEVICE(uint8_t * x) {}); + return block_cipher(input_ptr, input_numel, input_type_size, input_ptr, input_numel, input_type_size, + cipher, block_size / input_type_size, [] TORCH_CSPRNG_HOST_DEVICE(uint8_t * x) {}); } } // namespace csprng diff --git a/src/torchcsprng/kernels.cu b/src/torchcsprng/kernels.cu index 6f1567f..704b5a3 100644 --- a/src/torchcsprng/kernels.cu +++ b/src/torchcsprng/kernels.cu @@ -5,14 +5,12 @@ * LICENSE file in the root directory of this source tree. */ +#include #include "kernels.cuh" -#include #include "block_cipher.cuh" #include "aes.cuh" #include "owcf.cuh" -using at::Tensor; - namespace torch { namespace csprng { namespace cuda { @@ -30,34 +28,26 @@ namespace cuda { void check_cipher(const std::string &cipher, size_t key_size) { if (cipher == "aes128") { - TORCH_CHECK(key_size == 16, "key tensor must have 16 bytes(128 bits)"); + assert((void("key tensor must have 16 bytes(128 bits)"), key_size == 16)); } else { - TORCH_CHECK(false, "encrypt/decrypt supports \"aes128\" cipher, \"", cipher, "\" is not supported."); + assert((void("encrypt/decrypt only supports 'aes128' cipher"), false)); } } -void aes_ecb_encrypt(Tensor input, const uint8_t *key_bytes) { +int aes_ecb_encrypt(uint8_t *buf, size_t buf_size, const uint8_t *key_bytes) { uint8_t round_key[aes::round_key_t_size]; aes::KeyExpansion(round_key, key_bytes); - block_cipher(input, [round_key] TORCH_CSPRNG_HOST_DEVICE(int64_t idx, uint8_t * block) -> void { - owcf::matyas_meyer_oseas(block, round_key); - }); + return block_cipher( + buf, buf_size, [round_key] TORCH_CSPRNG_HOST_DEVICE(int64_t idx, uint8_t * block) -> void { + owcf::matyas_meyer_oseas(block, round_key); + }); } -Tensor encrypt(Tensor buf, const uint8_t *key, size_t key_size, const std::string &cipher) { - const auto input_size_bytes = buf.numel() * buf.itemsize(); - // const auto input_size_bytes_rounded = - // (input_size_bytes + aes::block_t_size - 1) / aes::block_t_size * aes::block_t_size; - // TORCH_CHECK(output_size_bytes == input_size_bytes_rounded, "output size in bytes(", output_size_bytes, - // ") is not equal to input size in bytes rounded to block size(", input_size_bytes_rounded, ")"); - // No need to check because we ensure `input_size_bytes % block_t_size == 0` previously in lib.cpp. +int encrypt(uint8_t *buf, size_t buf_size, const uint8_t *key, size_t key_size, const std::string &cipher) { check_cipher(cipher, key_size); - aes_ecb_encrypt(buf, key); - return buf; + return aes_ecb_encrypt(buf, buf_size, key); } -// The original kernels_body.inc ends here - } // namespace cuda } // namespace csprng } // namespace torch diff --git a/src/torchcsprng/kernels.cuh b/src/torchcsprng/kernels.cuh index 6dab64f..d04bb01 100644 --- a/src/torchcsprng/kernels.cuh +++ b/src/torchcsprng/kernels.cuh @@ -7,9 +7,9 @@ #pragma once -#include - -using at::Tensor; +#include +#include +#include namespace torch { namespace csprng { @@ -26,7 +26,7 @@ namespace cuda { // ================================================Encrypt/Decrypt===================================================== -Tensor encrypt(Tensor buf, const uint8_t *key, size_t key_size, const std::string &cipher); +int encrypt(uint8_t *buf, size_t buf_size, const uint8_t *key, size_t key_size, const std::string &cipher); // The original kernels_body.inc ends here From edd809db9d486e2db77fdc37064e2df5f6a1eed6 Mon Sep 17 00:00:00 2001 From: myl7 Date: Fri, 12 Apr 2024 12:37:44 +0800 Subject: [PATCH 09/10] Update exported names --- CMakeLists.txt | 2 +- include/fssprgcuda.h | 3 ++- src/fssprgcuda.cpp | 10 ++++------ 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 02b5ace..e7e828f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,6 @@ cmake_minimum_required(VERSION 3.22) # Set env `CUDACXX=/absolute/path/to/nvcc` to enable the CUDA language # if nvcc is not in the PATH. project(fss-prg-cuda LANGUAGES CUDA CXX) -set(CMAKE_CXX_STANDARD 17) # If the builder is confused by a soft link from /usr/local/cuda-* to /usr/local/cuda, # pass `-DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-*` to CMake to locate CUDA. @@ -17,6 +16,7 @@ add_library( src/torchcsprng/owcf.cu src/torchcsprng/aes.cu ) +target_compile_features(fssprgcuda PUBLIC cxx_std_17) set_target_properties(fssprgcuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON) target_include_directories(fssprgcuda PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include") target_compile_options(fssprgcuda PRIVATE $<$:--extended-lambda>) diff --git a/include/fssprgcuda.h b/include/fssprgcuda.h index 5907d1d..2a6ed56 100644 --- a/include/fssprgcuda.h +++ b/include/fssprgcuda.h @@ -3,10 +3,11 @@ #pragma once +#include #include namespace fssprgcuda { -void matyas_meyer_oseas_aes128(uint8_t *buf, int64_t buf_size, const uint8_t *key); +int Aes128MatyasMeyerOseas(uint8_t *buf, size_t buf_size, const uint8_t *key, size_t key_size); } // namespace fssprgcuda diff --git a/src/fssprgcuda.cpp b/src/fssprgcuda.cpp index 192c8fd..dd4a0b8 100644 --- a/src/fssprgcuda.cpp +++ b/src/fssprgcuda.cpp @@ -1,16 +1,14 @@ // Copyright (C) myl7 // SPDX-License-Identifier: BSD-3-Clause -#include "fssprgcuda.h" +#include #include "torchcsprng/kernels.cuh" -using torch::csprng::cuda::encrypt; - -constexpr size_t block_t_size = 16; - namespace fssprgcuda { -int matyas_meyer_oseas_aes128(uint8_t *buf, size_t buf_size, const uint8_t *key, size_t key_size) { +using torch::csprng::cuda::encrypt; + +int Aes128MatyasMeyerOseas(uint8_t *buf, size_t buf_size, const uint8_t *key, size_t key_size) { return encrypt(buf, buf_size, key, key_size, "aes128"); } From 10350c046e060e6f976891804ae635112ab9a1f0 Mon Sep 17 00:00:00 2001 From: myl7 Date: Fri, 12 Apr 2024 20:12:01 +0800 Subject: [PATCH 10/10] Link C++ and CUDA with Rust Going to fix the test --- .gitignore | 8 +++- CMakeLists.txt | 2 +- Cargo.lock | 93 ++++++++++++++++++++++++++++++++++++++ Cargo.toml | 15 ++++++ build.rs | 10 ++++ src/lib.rs | 62 +++++++++++++++++++++++++ src/torchcsprng/kernels.cu | 4 +- 7 files changed, 190 insertions(+), 4 deletions(-) create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 build.rs create mode 100644 src/lib.rs diff --git a/.gitignore b/.gitignore index f683121..4f18e5e 100644 --- a/.gitignore +++ b/.gitignore @@ -35,5 +35,9 @@ .idea .vscode -# Build -/build +# Build dir +build +cmake-build-* + +# cargo +/target diff --git a/CMakeLists.txt b/CMakeLists.txt index e7e828f..8f01b9f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,6 +17,6 @@ add_library( src/torchcsprng/aes.cu ) target_compile_features(fssprgcuda PUBLIC cxx_std_17) -set_target_properties(fssprgcuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON) +set_target_properties(fssprgcuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON CUDA_RESOLVE_DEVICE_SYMBOLS ON) target_include_directories(fssprgcuda PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include") target_compile_options(fssprgcuda PRIVATE $<$:--extended-lambda>) diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..a078128 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,93 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aes" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", +] + +[[package]] +name = "cpufeatures" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" +dependencies = [ + "libc", +] + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "fss-prg-cuda" +version = "0.1.0" +dependencies = [ + "aes", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "inout" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5" +dependencies = [ + "generic-array", +] + +[[package]] +name = "libc" +version = "0.2.152" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" + +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..959074e --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "fss-prg-cuda" +version = "0.1.0" +edition = "2021" +authors = ["myl7 "] +# description = "" +license = "BSD-3-Clause" +homepage = "https://github.com/myl7/fss-prg-cuda" +# documentation = "https://docs.rs/fss-prg-cuda" +repository = "https://github.com/myl7/fss-prg-cuda.git" +keywords = ["crypto", "fss", "prg", "cuda"] +categories = ["cryptography"] + +[dev-dependencies] +aes = "0.8.4" diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..eb5f1b7 --- /dev/null +++ b/build.rs @@ -0,0 +1,10 @@ +fn main() { + println!("cargo:rustc-link-search={}", "build"); + println!("cargo:rustc-link-search={}", "/usr/local/cuda/lib64"); + + println!("cargo:rustc-link-lib=static={}", "fssprgcuda"); + println!("cargo:rerun-if-changed={}", "build/libfssprgcuda.a"); + + println!("cargo:rustc-link-lib=dylib={}", "stdc++"); + println!("cargo:rustc-link-lib=dylib={}", "cudart"); +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..6571ca8 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,62 @@ +// Copyright (C) myl7 +// SPDX-License-Identifier: BSD-3-Clause + +pub mod ffi { + use std::ffi::c_int; + + extern "C" { + #[link_name = "_ZN10fssprgcuda22Aes128MatyasMeyerOseasEPhmPKhm"] + pub fn aes128_matyas_meyer_oseas( + buf: *mut u8, + buf_size: usize, + key: *const u8, + key_size: usize, + ) -> c_int; + } +} + +pub fn aes128_matyas_meyer_oseas(buf: &mut [u8], key: &[u8]) -> i32 { + unsafe { + ffi::aes128_matyas_meyer_oseas(buf.as_mut_ptr(), buf.len(), key.as_ptr(), key.len()) as i32 + } +} + +#[cfg(test)] +mod tests { + use aes::cipher::generic_array::GenericArray; + use aes::cipher::{BlockEncrypt, KeyInit}; + use aes::Aes128; + + use super::*; + + fn xor_inplace(lhs: &mut [u8], rhs: &[u8]) { + lhs.iter_mut().zip(rhs.iter()).for_each(|(lb, rb)| { + *lb ^= rb; + }); + } + + fn aes128_matyas_meyer_oseas_alt(buf: &mut [u8], key: &[u8]) { + assert_eq!(buf.len(), key.len()); + assert_eq!(buf.len() % 16, 0); + (0..buf.len() / 16).for_each(|i| { + let key_block = GenericArray::from_slice(&key[i * 16..(i + 1) * 16]); + let cipher = Aes128::new(key_block); + let in_block = GenericArray::from_slice(&mut buf[i * 16..(i + 1) * 16]); + let mut out_block = GenericArray::default(); + cipher.encrypt_block_b2b(in_block, &mut out_block); + xor_inplace(&mut buf[i * 16..(i + 1) * 16], &out_block); + }); + } + + const BUF: &[u8] = b"g\xf1U\xf4\xc3-k\x8b\xb8\xcdA\x0c\xebQE\x97@\xb5\xf9\xca\x9278\xca\xb9\x82\xc1\xa1IR\x1d$\x92\x7fE\x18\xbd\t<(\xa5\x99[\x84\x95\x07L\x06'`\x0cU\xde\xb3\x0e\xa3\xfd`|\x96\xf5?\xe9\x04"; + const KEY: &[u8] = b"\xf0>\xc0\x8c\x1d|8m\x13oOm\xd4\xd46\x13\xfdk\x99\xa6\x10\xe8yj\xf1\x96\xc4\x9b\xc2jZ\xbf\xe8\xb1\x8ab\xe9n\x02\x07\xc6\xb6\xd7M\xc3[5\x13\xa5`\xef?\xc8| \xff\x16\xc0\xeaO&\xc5n\x9a"; + + #[test] + fn test_aes128_matyas_meyer_oseas() { + let mut buf = BUF.to_owned(); + aes128_matyas_meyer_oseas(&mut buf, KEY); + let mut buf_alt = BUF.to_owned(); + aes128_matyas_meyer_oseas_alt(&mut buf_alt, KEY); + assert_eq!(buf, buf_alt); + } +} diff --git a/src/torchcsprng/kernels.cu b/src/torchcsprng/kernels.cu index 704b5a3..c3ce3fd 100644 --- a/src/torchcsprng/kernels.cu +++ b/src/torchcsprng/kernels.cu @@ -28,7 +28,8 @@ namespace cuda { void check_cipher(const std::string &cipher, size_t key_size) { if (cipher == "aes128") { - assert((void("key tensor must have 16 bytes(128 bits)"), key_size == 16)); + // TODO: Different check + // assert((void("key tensor must have 16 bytes(128 bits)"), key_size == 16)); } else { assert((void("encrypt/decrypt only supports 'aes128' cipher"), false)); } @@ -44,6 +45,7 @@ int aes_ecb_encrypt(uint8_t *buf, size_t buf_size, const uint8_t *key_bytes) { } int encrypt(uint8_t *buf, size_t buf_size, const uint8_t *key, size_t key_size, const std::string &cipher) { + // TODO: More checks check_cipher(cipher, key_size); return aes_ecb_encrypt(buf, buf_size, key); }