From c2a9c717722257f262d0ce0f5463eb8973d55145 Mon Sep 17 00:00:00 2001
From: myl7 <myl@myl.moe>
Date: Mon, 11 Dec 2023 14:23:20 +0800
Subject: [PATCH 01/10] Remove unused files and format files

---
 .circleci/config.yml                          | 2458 -----------------
 .../unittest/linux/scripts/environment.yml    |   15 -
 .circleci/unittest/linux/scripts/install.sh   |   29 -
 .../unittest/linux/scripts/post_process.sh    |    8 -
 .circleci/unittest/linux/scripts/run_test.sh  |    9 -
 .circleci/unittest/linux/scripts/setup_env.sh |   39 -
 .../unittest/windows/scripts/environment.yml  |   15 -
 .circleci/unittest/windows/scripts/install.sh |   31 -
 .../windows/scripts/install_conda.bat         |    1 -
 .../unittest/windows/scripts/post_process.sh  |    8 -
 .../unittest/windows/scripts/run_test.sh      |    9 -
 .../unittest/windows/scripts/setup_env.sh     |   39 -
 .../windows/scripts/vc_env_helper.bat         |   39 -
 .github/csprng_architecture.png               |  Bin 92773 -> 0 bytes
 .gitignore                                    |    9 +-
 CODE_OF_CONDUCT.md                            |   76 -
 CONTRIBUTING.md                               |   31 -
 README.md                                     |  105 +-
 examples/csprng.ipynb                         |  226 --
 examples/encrypt_decrypt.ipynb                |  307 --
 packaging/README.md                           |   90 -
 packaging/build_conda.sh                      |   14 -
 packaging/build_wheel.sh                      |   54 -
 packaging/conda/build_csprng.sh               |  229 --
 packaging/conda/install_conda.bat             |    1 -
 packaging/conda/switch_cuda_version.sh        |   28 -
 packaging/pkg_helpers.bash                    |  382 ---
 packaging/torchcsprng/bld.bat                 |   27 -
 packaging/torchcsprng/conda_build_config.yaml |   26 -
 packaging/torchcsprng/meta.yaml               |   56 -
 packaging/vs2017/activate.bat                 |   44 -
 packaging/vs2017/conda_build_config.yaml      |   24 -
 packaging/vs2017/install_activate.bat         |   30 -
 packaging/vs2017/install_runtime.bat          |   49 -
 packaging/vs2017/meta.yaml                    |   24 -
 packaging/vs2019/activate.bat                 |   44 -
 packaging/vs2019/conda_build_config.yaml      |   24 -
 packaging/vs2019/install_activate.bat         |   30 -
 packaging/vs2019/install_runtime.bat          |   49 -
 packaging/vs2019/meta.yaml                    |   24 -
 packaging/wheel/linux_manywheel.sh            |   62 -
 packaging/wheel/osx_wheel.sh                  |   52 -
 packaging/wheel/relocate.py                   |  408 ---
 packaging/windows/azure-pipelines-ci.yml      |   11 -
 packaging/windows/azure-pipelines.yml         |   35 -
 packaging/windows/build_csprng.bat            |  145 -
 packaging/windows/cpu.bat                     |   37 -
 packaging/windows/cuda101.bat                 |   59 -
 packaging/windows/cuda102.bat                 |   59 -
 packaging/windows/cuda92.bat                  |   59 -
 packaging/windows/internal/auth.bat           |   46 -
 packaging/windows/internal/build_conda.bat    |   15 -
 packaging/windows/internal/build_wheels.bat   |   12 -
 packaging/windows/internal/check_deps.bat     |   67 -
 packaging/windows/internal/check_opts.bat     |   33 -
 packaging/windows/internal/clean.bat          |    5 -
 packaging/windows/internal/clone.bat          |   56 -
 packaging/windows/internal/copy.bat           |   13 -
 packaging/windows/internal/copy_cpu.bat       |    1 -
 packaging/windows/internal/cuda_install.bat   |  201 --
 packaging/windows/internal/dep_install.bat    |   14 -
 packaging/windows/internal/env_fix.bat        |   31 -
 .../windows/internal/nightly_defaults.bat     |  200 --
 packaging/windows/internal/publish.bat        |   89 -
 packaging/windows/internal/setup.bat          |   44 -
 packaging/windows/internal/test.bat           |   79 -
 packaging/windows/internal/upload.bat         |   96 -
 packaging/windows/internal/vc_env_helper.bat  |   43 -
 .../windows/internal/vc_install_helper.sh     |   16 -
 packaging/windows/internal/vs2017_install.ps1 |   25 -
 packaging/windows/internal/vs2019_install.ps1 |   21 -
 packaging/windows/internal/vs_install.bat     |   14 -
 packaging/windows/old/cuda100.bat             |   59 -
 packaging/windows/old/cuda90.bat              |   59 -
 packaging/windows/templates/auth_task.yml     |   17 -
 packaging/windows/templates/build_conda.yml   |   15 -
 packaging/windows/templates/build_task.yml    |  173 --
 packaging/windows/templates/build_wheels.yml  |    9 -
 .../windows/templates/linux_build_task.yml    |   38 -
 .../templates/override_pytorch_version.yml    |    6 -
 .../windows/templates/publish_packages.yml    |    8 -
 .../templates/publish_test_results.yml        |    6 -
 .../templates/setup_env_for_msagent.yml       |   25 -
 .../templates/setup_nightly_variables.yml     |   11 -
 .../windows/templates/upload_to_conda.yml     |   10 -
 packaging/windows/templates/upload_to_s3.yml  |   15 -
 packaging/windows/templates/vsts_auth.yml     |    8 -
 test/__init__.py                              |    4 -
 test/test_csprng.py                           |  654 -----
 torchcsprng/__init__.py                       |   14 -
 torchcsprng/__init__.pyi                      |   14 -
 91 files changed, 61 insertions(+), 7835 deletions(-)
 delete mode 100644 .circleci/config.yml
 delete mode 100644 .circleci/unittest/linux/scripts/environment.yml
 delete mode 100755 .circleci/unittest/linux/scripts/install.sh
 delete mode 100755 .circleci/unittest/linux/scripts/post_process.sh
 delete mode 100755 .circleci/unittest/linux/scripts/run_test.sh
 delete mode 100755 .circleci/unittest/linux/scripts/setup_env.sh
 delete mode 100644 .circleci/unittest/windows/scripts/environment.yml
 delete mode 100644 .circleci/unittest/windows/scripts/install.sh
 delete mode 100644 .circleci/unittest/windows/scripts/install_conda.bat
 delete mode 100644 .circleci/unittest/windows/scripts/post_process.sh
 delete mode 100644 .circleci/unittest/windows/scripts/run_test.sh
 delete mode 100644 .circleci/unittest/windows/scripts/setup_env.sh
 delete mode 100644 .circleci/unittest/windows/scripts/vc_env_helper.bat
 delete mode 100644 .github/csprng_architecture.png
 delete mode 100644 CODE_OF_CONDUCT.md
 delete mode 100644 CONTRIBUTING.md
 delete mode 100644 examples/csprng.ipynb
 delete mode 100644 examples/encrypt_decrypt.ipynb
 delete mode 100644 packaging/README.md
 delete mode 100755 packaging/build_conda.sh
 delete mode 100755 packaging/build_wheel.sh
 delete mode 100755 packaging/conda/build_csprng.sh
 delete mode 100644 packaging/conda/install_conda.bat
 delete mode 100755 packaging/conda/switch_cuda_version.sh
 delete mode 100644 packaging/pkg_helpers.bash
 delete mode 100644 packaging/torchcsprng/bld.bat
 delete mode 100644 packaging/torchcsprng/conda_build_config.yaml
 delete mode 100644 packaging/torchcsprng/meta.yaml
 delete mode 100644 packaging/vs2017/activate.bat
 delete mode 100644 packaging/vs2017/conda_build_config.yaml
 delete mode 100644 packaging/vs2017/install_activate.bat
 delete mode 100644 packaging/vs2017/install_runtime.bat
 delete mode 100644 packaging/vs2017/meta.yaml
 delete mode 100644 packaging/vs2019/activate.bat
 delete mode 100644 packaging/vs2019/conda_build_config.yaml
 delete mode 100644 packaging/vs2019/install_activate.bat
 delete mode 100644 packaging/vs2019/install_runtime.bat
 delete mode 100644 packaging/vs2019/meta.yaml
 delete mode 100644 packaging/wheel/linux_manywheel.sh
 delete mode 100644 packaging/wheel/osx_wheel.sh
 delete mode 100644 packaging/wheel/relocate.py
 delete mode 100644 packaging/windows/azure-pipelines-ci.yml
 delete mode 100644 packaging/windows/azure-pipelines.yml
 delete mode 100644 packaging/windows/build_csprng.bat
 delete mode 100644 packaging/windows/cpu.bat
 delete mode 100644 packaging/windows/cuda101.bat
 delete mode 100644 packaging/windows/cuda102.bat
 delete mode 100644 packaging/windows/cuda92.bat
 delete mode 100644 packaging/windows/internal/auth.bat
 delete mode 100644 packaging/windows/internal/build_conda.bat
 delete mode 100644 packaging/windows/internal/build_wheels.bat
 delete mode 100644 packaging/windows/internal/check_deps.bat
 delete mode 100644 packaging/windows/internal/check_opts.bat
 delete mode 100644 packaging/windows/internal/clean.bat
 delete mode 100644 packaging/windows/internal/clone.bat
 delete mode 100644 packaging/windows/internal/copy.bat
 delete mode 100644 packaging/windows/internal/copy_cpu.bat
 delete mode 100644 packaging/windows/internal/cuda_install.bat
 delete mode 100644 packaging/windows/internal/dep_install.bat
 delete mode 100644 packaging/windows/internal/env_fix.bat
 delete mode 100644 packaging/windows/internal/nightly_defaults.bat
 delete mode 100644 packaging/windows/internal/publish.bat
 delete mode 100644 packaging/windows/internal/setup.bat
 delete mode 100644 packaging/windows/internal/test.bat
 delete mode 100644 packaging/windows/internal/upload.bat
 delete mode 100644 packaging/windows/internal/vc_env_helper.bat
 delete mode 100644 packaging/windows/internal/vc_install_helper.sh
 delete mode 100644 packaging/windows/internal/vs2017_install.ps1
 delete mode 100644 packaging/windows/internal/vs2019_install.ps1
 delete mode 100644 packaging/windows/internal/vs_install.bat
 delete mode 100644 packaging/windows/old/cuda100.bat
 delete mode 100644 packaging/windows/old/cuda90.bat
 delete mode 100644 packaging/windows/templates/auth_task.yml
 delete mode 100644 packaging/windows/templates/build_conda.yml
 delete mode 100644 packaging/windows/templates/build_task.yml
 delete mode 100644 packaging/windows/templates/build_wheels.yml
 delete mode 100644 packaging/windows/templates/linux_build_task.yml
 delete mode 100644 packaging/windows/templates/override_pytorch_version.yml
 delete mode 100644 packaging/windows/templates/publish_packages.yml
 delete mode 100644 packaging/windows/templates/publish_test_results.yml
 delete mode 100644 packaging/windows/templates/setup_env_for_msagent.yml
 delete mode 100644 packaging/windows/templates/setup_nightly_variables.yml
 delete mode 100644 packaging/windows/templates/upload_to_conda.yml
 delete mode 100644 packaging/windows/templates/upload_to_s3.yml
 delete mode 100644 packaging/windows/templates/vsts_auth.yml
 delete mode 100644 test/__init__.py
 delete mode 100644 test/test_csprng.py
 delete mode 100644 torchcsprng/__init__.py
 delete mode 100644 torchcsprng/__init__.pyi

diff --git a/.circleci/config.yml b/.circleci/config.yml
deleted file mode 100644
index acd6ffa..0000000
--- a/.circleci/config.yml
+++ /dev/null
@@ -1,2458 +0,0 @@
-version: 2.1
-
-executors:
-  windows-cpu:
-    machine:
-      resource_class: windows.xlarge
-      image: windows-server-2019-vs2019:stable
-      shell: bash.exe
-
-  windows-gpu:
-    machine:
-      resource_class: windows.gpu.nvidia.medium
-      image: windows-server-2019-nvidia:stable
-      shell: bash.exe
-
-commands:
-
-  checkout_merge:
-    description: "checkout merge branch"
-    steps:
-      - checkout
-  #     - run:
-  #         name: Checkout merge branch
-  #         command: |
-  #           set -ex
-  #           BRANCH=$(git rev-parse --abbrev-ref HEAD)
-  #           if [[ "$BRANCH" != "master" ]]; then
-  #             git fetch --force origin ${CIRCLE_BRANCH}/merge:merged/${CIRCLE_BRANCH}
-  #             git checkout "merged/$CIRCLE_BRANCH"
-  #           fi
-
-  designate_upload_channel:
-    description: "inserts the correct upload channel into ${BASH_ENV}"
-    steps:
-      - run:
-          name: adding UPLOAD_CHANNEL to BASH_ENV
-          command: |
-            our_upload_channel=nightly
-            # On tags upload to test instead
-            if [[ -n "${CIRCLE_TAG}" ]]; then
-              our_upload_channel=test
-            fi
-            echo "export UPLOAD_CHANNEL=${our_upload_channel}" >> ${BASH_ENV}
-
-binary_common: &binary_common
-  parameters:
-    # Edit these defaults to do a release`
-    build_version:
-      description: "version number of release binary; by default, build a nightly"
-      type: string
-      default: ""
-    pytorch_version:
-      description: "PyTorch version to build against; by default, use a nightly"
-      type: string
-      default: ""
-    # Don't edit these
-    python_version:
-      description: "Python version to build against (e.g., 3.7)"
-      type: string
-    cu_version:
-      description: "CUDA version to build against, in CU format (e.g., cpu or cu100)"
-      type: string
-    unicode_abi:
-      description: "Python 2.7 wheel only: whether or not we are cp27mu (default: no)"
-      type: string
-      default: ""
-    wheel_docker_image:
-      description: "Wheel only: what docker image to use"
-      type: string
-      default: "pytorch/manylinux-cuda101"
-  environment:
-    PYTHON_VERSION: << parameters.python_version >>
-    PYTORCH_VERSION: << parameters.pytorch_version >>
-    UNICODE_ABI: << parameters.unicode_abi >>
-    CU_VERSION: << parameters.cu_version >>
-
-jobs:
-
-  binary_linux_wheel:
-    <<: *binary_common
-    docker:
-      - image: << parameters.wheel_docker_image >>
-    resource_class: 2xlarge+
-    steps:
-      - checkout_merge
-      - run: packaging/build_wheel.sh
-      - store_artifacts:
-          path: dist
-      - persist_to_workspace:
-          root: dist
-          paths:
-            - "*"
-
-  binary_linux_conda:
-    <<: *binary_common
-    docker:
-      - image: "pytorch/conda-cuda"
-    resource_class: 2xlarge+
-    steps:
-      - checkout_merge
-      - run:
-          no_output_timeout: 20m
-          command: packaging/build_conda.sh
-      - store_artifacts:
-          path: /opt/conda/conda-bld/linux-64
-      - persist_to_workspace:
-          root: /opt/conda/conda-bld/linux-64
-          paths:
-            - "*"
-      - store_test_results:
-          path: build_results/
-
-  binary_win_conda:
-    <<: *binary_common
-    executor: windows-cpu
-    steps:
-      - checkout_merge
-      - run:
-          name: Build conda packages
-          no_output_timeout: 20m
-          command: |
-            set -ex
-            source packaging/windows/internal/vc_install_helper.sh
-            packaging/windows/internal/cuda_install.bat
-            eval "$('/C/tools/miniconda3/Scripts/conda.exe' 'shell.bash' 'hook')"
-            conda activate base
-            conda install -yq conda-build "conda-package-handling!=1.5.0"
-            packaging/build_conda.sh
-            rm /C/tools/miniconda3/conda-bld/win-64/vs${VC_YEAR}*.tar.bz2
-      - store_artifacts:
-          path: C:/tools/miniconda3/conda-bld/win-64
-      - persist_to_workspace:
-          root: C:/tools/miniconda3/conda-bld/win-64
-          paths:
-            - "*"
-      - store_test_results:
-          path: build_results/
-
-  binary_win_wheel:
-    <<: *binary_common
-    executor: windows-cpu
-    steps:
-      - checkout_merge
-      - run:
-          name: Build wheel packages
-          command: |
-            set -ex
-            source packaging/windows/internal/vc_install_helper.sh
-            packaging/windows/internal/cuda_install.bat
-            packaging/build_wheel.sh
-      - store_artifacts:
-          path: dist
-      - persist_to_workspace:
-          root: dist
-          paths:
-            - "*"
-      - store_test_results:
-          path: build_results/
-
-  binary_macos_wheel:
-    <<: *binary_common
-    macos:
-      xcode: "12.0"
-    steps:
-      - checkout_merge
-#      - run:
-#          name: Install libomp
-#          command: HOMEBREW_NO_AUTO_UPDATE=1 brew install libomp
-#          # Disable brew auto update which is very slow
-      - run:
-          # Cannot easily deduplicate this as source'ing activate
-          # will set environment variables which we need to propagate
-          # to build_wheel.sh
-          command: |
-            curl -o conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
-            sh conda.sh -b
-            source $HOME/miniconda3/bin/activate
-            packaging/build_wheel.sh
-      - store_artifacts:
-          path: dist
-      - persist_to_workspace:
-          root: dist
-          paths:
-            - "*"
-
-  binary_macos_conda:
-    <<: *binary_common
-    macos:
-      xcode: "12.0"
-    steps:
-      - checkout_merge
-#      - run:
-#          name: Install libomp
-#          command: HOMEBREW_NO_AUTO_UPDATE=1 brew install libomp
-#          # Disable brew auto update which is very slow
-      - run:
-          command: |
-            curl -o conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
-            sh conda.sh -b
-            source $HOME/miniconda3/bin/activate
-            conda install -yq conda-build
-            packaging/build_conda.sh
-      - store_artifacts:
-          path: /Users/distiller/miniconda3/conda-bld/osx-64
-      - persist_to_workspace:
-          root: /Users/distiller/miniconda3/conda-bld/osx-64
-          paths:
-            - "*"
-      - store_test_results:
-          path: build_results/
-
-  # Requires org-member context
-  binary_conda_upload:
-    docker:
-      - image: continuumio/miniconda
-    steps:
-      - attach_workspace:
-          at: ~/workspace
-      - designate_upload_channel
-      - run:
-          command: |
-            # Prevent credential from leaking
-            conda install -yq anaconda-client
-            set -x
-            anaconda  -t "${CONDA_PYTORCHBOT_TOKEN}" upload ~/workspace/*.tar.bz2 -u "pytorch-${UPLOAD_CHANNEL}" --label main --no-progress --force
-  # Requires org-member context
-  binary_wheel_upload:
-    parameters:
-      subfolder:
-        description: "What whl subfolder to upload to, e.g., blank or cu100/ (trailing slash is important)"
-        type: string
-    docker:
-      - image: circleci/python:3.7
-    steps:
-      - attach_workspace:
-          at: ~/workspace
-      - designate_upload_channel
-      - checkout
-      - run:
-          command: |
-            pip install --user awscli
-            export PATH="$HOME/.local/bin:$PATH"
-            # Prevent credential from leaking
-            set +x
-            export AWS_ACCESS_KEY_ID="${PYTORCH_BINARY_AWS_ACCESS_KEY_ID}"
-            export AWS_SECRET_ACCESS_KEY="${PYTORCH_BINARY_AWS_SECRET_ACCESS_KEY}"
-            set -x
-            for pkg in ~/workspace/*.whl; do
-              aws s3 cp "$pkg" "s3://pytorch/whl/${UPLOAD_CHANNEL}/<< parameters.subfolder >>" --acl public-read
-            done
-
-  unittest_linux_cpu:
-    <<: *binary_common
-    docker:
-      - image: "pytorch/manylinux-cuda102"
-    resource_class: 2xlarge+
-    steps:
-      - checkout
-      - run:
-          name: Generate cache key
-          # This will refresh cache on Sundays, nightly build should generate new cache.
-          command: echo "$(date +"%Y-%U")" > .circleci-weekly
-      - restore_cache:
-
-          keys:
-            - env-v2-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
-
-      - run:
-          name: Setup
-          command: .circleci/unittest/linux/scripts/setup_env.sh
-      - save_cache:
-
-          key: env-v2-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
-
-          paths:
-            - conda
-            - env
-      - run:
-          name: Install torchcsprng
-          command: .circleci/unittest/linux/scripts/install.sh
-      - run:
-          name: Run tests
-          command: .circleci/unittest/linux/scripts/run_test.sh
-      - run:
-          name: Post process
-          command: .circleci/unittest/linux/scripts/post_process.sh
-      - store_test_results:
-          path: test-results
-
-  unittest_linux_gpu:
-    <<: *binary_common
-    machine:
-      image: ubuntu-1604-cuda-10.1:201909-23
-    resource_class: gpu.nvidia.small.multi
-    environment:
-      image_name: "pytorch/manylinux-cuda101"
-      PYTHON_VERSION: << parameters.python_version >>
-    steps:
-      - checkout
-      - run:
-          name: Generate cache key
-          # This will refresh cache on Sundays, nightly build should generate new cache.
-          command: echo "$(date +"%Y-%U")" > .circleci-weekly
-      - restore_cache:
-
-          keys:
-            - env-v2-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
-
-      - run:
-          name: Setup
-          command: docker run -e PYTHON_VERSION -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/setup_env.sh
-      - save_cache:
-
-          key: env-v2-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
-
-          paths:
-            - conda
-            - env
-      - run:
-          name: Install torchcsprng
-          command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/install.sh
-      - run:
-          name: Run tests
-          command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/run_test.sh
-      - run:
-          name: Post Process
-          command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/post_process.sh
-      - store_test_results:
-          path: test-results
-
-  unittest_windows_cpu:
-    <<: *binary_common
-    executor:
-      name: windows-cpu
-    steps:
-      - checkout
-      - run:
-          name: Generate cache key
-          # This will refresh cache on Sundays, nightly build should generate new cache.
-          command: echo "$(date +"%Y-%U")" > .circleci-weekly
-      - restore_cache:
-
-          keys:
-            - env-v2-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
-
-      - run:
-          name: Setup
-          command: .circleci/unittest/windows/scripts/setup_env.sh
-      - save_cache:
-
-          key: env-v2-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
-
-          paths:
-            - conda
-            - env
-      - run:
-          name: Install torchcsprng
-          command: .circleci/unittest/windows/scripts/install.sh
-      - run:
-          name: Run tests
-          command: .circleci/unittest/windows/scripts/run_test.sh
-      - run:
-          name: Post process
-          command: .circleci/unittest/windows/scripts/post_process.sh
-      - store_test_results:
-          path: test-results
-
-  unittest_windows_gpu:
-    <<: *binary_common
-    executor:
-      name: windows-gpu
-    environment:
-      CUDA_VERSION: "10.1"
-      PYTHON_VERSION: << parameters.python_version >>
-    steps:
-      - checkout
-      - run:
-          name: Generate cache key
-          # This will refresh cache on Sundays, nightly build should generate new cache.
-          command: echo "$(date +"%Y-%U")" > .circleci-weekly
-      - restore_cache:
-
-          keys:
-            - env-v1-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
-
-      - run:
-          name: Setup
-          command: .circleci/unittest/windows/scripts/setup_env.sh
-      - save_cache:
-
-          key: env-v1-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
-
-          paths:
-            - conda
-            - env
-      - run:
-          name: Install torchcsprng
-          command: .circleci/unittest/windows/scripts/install.sh
-      - run:
-          name: Run tests
-          command: .circleci/unittest/windows/scripts/run_test.sh
-      - run:
-          name: Post process
-          command: .circleci/unittest/windows/scripts/post_process.sh
-      - store_test_results:
-          path: test-results
-
-  unittest_macos_cpu:
-    <<: *binary_common
-    macos:
-      xcode: "12.0"
-    resource_class: large
-    steps:
-      - checkout
-      - designate_upload_channel
-      - run:
-          name: Install wget
-          command: HOMEBREW_NO_AUTO_UPDATE=1 brew install wget
-          # Disable brew auto update which is very slow
-#      - run:
-#          name: Install libomp
-#          command: HOMEBREW_NO_AUTO_UPDATE=1 brew install libomp
-#          # Disable brew auto update which is very slow
-      - run:
-          name: Generate cache key
-          # This will refresh cache on Sundays, nightly build should generate new cache.
-          command: echo "$(date +"%Y-%U")" > .circleci-weekly
-      - restore_cache:
-
-          keys:
-            - env-v3-macos-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
-
-      - run:
-          name: Setup
-          command: .circleci/unittest/linux/scripts/setup_env.sh
-      - save_cache:
-
-          key: env-v3-macos-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
-
-          paths:
-            - conda
-            - env
-      - run:
-          name: Install torchcsprng
-          command: .circleci/unittest/linux/scripts/install.sh
-      - run:
-          name: Run tests
-          command: .circleci/unittest/linux/scripts/run_test.sh
-      - run:
-          name: Post process
-          command: .circleci/unittest/linux/scripts/post_process.sh
-      - store_test_results:
-          path: test-results
-
-workflows:
-  build:
-    jobs:
-#      - circleci_consistency
-      - binary_linux_wheel:
-          cu_version: cpu
-          name: binary_linux_wheel_py3.6_cpu
-          python_version: '3.6'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_linux_wheel:
-          cu_version: cu101
-          name: binary_linux_wheel_py3.6_cu101
-          python_version: '3.6'
-          wheel_docker_image: pytorch/manylinux-cuda101
-      - binary_linux_wheel:
-          cu_version: cu102
-          name: binary_linux_wheel_py3.6_cu102
-          python_version: '3.6'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_linux_wheel:
-          cu_version: cu111
-          name: binary_linux_wheel_py3.6_cu111
-          python_version: '3.6'
-          wheel_docker_image: pytorch/manylinux-cuda111
-      - binary_linux_wheel:
-          cu_version: cpu
-          name: binary_linux_wheel_py3.7_cpu
-          python_version: '3.7'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_linux_wheel:
-          cu_version: cu101
-          name: binary_linux_wheel_py3.7_cu101
-          python_version: '3.7'
-          wheel_docker_image: pytorch/manylinux-cuda101
-      - binary_linux_wheel:
-          cu_version: cu102
-          name: binary_linux_wheel_py3.7_cu102
-          python_version: '3.7'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_linux_wheel:
-          cu_version: cu111
-          name: binary_linux_wheel_py3.7_cu111
-          python_version: '3.7'
-          wheel_docker_image: pytorch/manylinux-cuda111
-      - binary_linux_wheel:
-          cu_version: cpu
-          name: binary_linux_wheel_py3.8_cpu
-          python_version: '3.8'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_linux_wheel:
-          cu_version: cu101
-          name: binary_linux_wheel_py3.8_cu101
-          python_version: '3.8'
-          wheel_docker_image: pytorch/manylinux-cuda101
-      - binary_linux_wheel:
-          cu_version: cu102
-          name: binary_linux_wheel_py3.8_cu102
-          python_version: '3.8'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_linux_wheel:
-          cu_version: cu111
-          name: binary_linux_wheel_py3.8_cu111
-          python_version: '3.8'
-          wheel_docker_image: pytorch/manylinux-cuda111
-      - binary_linux_wheel:
-          cu_version: cpu
-          name: binary_linux_wheel_py3.9_cpu
-          python_version: '3.9'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_linux_wheel:
-          cu_version: cu101
-          name: binary_linux_wheel_py3.9_cu101
-          python_version: '3.9'
-          wheel_docker_image: pytorch/manylinux-cuda101
-      - binary_linux_wheel:
-          cu_version: cu102
-          name: binary_linux_wheel_py3.9_cu102
-          python_version: '3.9'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_linux_wheel:
-          cu_version: cu111
-          name: binary_linux_wheel_py3.9_cu111
-          python_version: '3.9'
-          wheel_docker_image: pytorch/manylinux-cuda111
-      - binary_macos_wheel:
-          cu_version: cpu
-          name: binary_macos_wheel_py3.6_cpu
-          python_version: '3.6'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_macos_wheel:
-          cu_version: cpu
-          name: binary_macos_wheel_py3.7_cpu
-          python_version: '3.7'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_macos_wheel:
-          cu_version: cpu
-          name: binary_macos_wheel_py3.8_cpu
-          python_version: '3.8'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_macos_wheel:
-          cu_version: cpu
-          name: binary_macos_wheel_py3.9_cpu
-          python_version: '3.9'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_win_wheel:
-          cu_version: cpu
-          filters:
-            branches:
-              only: master
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: binary_win_wheel_py3.6_cpu
-          python_version: '3.6'
-      - binary_win_wheel:
-          cu_version: cu101
-          filters:
-            branches:
-              only: master
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: binary_win_wheel_py3.6_cu101
-          python_version: '3.6'
-      - binary_win_wheel:
-          cu_version: cu102
-          filters:
-            branches:
-              only: master
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: binary_win_wheel_py3.6_cu102
-          python_version: '3.6'
-      - binary_win_wheel:
-          cu_version: cu111
-          filters:
-            branches:
-              only: master
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: binary_win_wheel_py3.6_cu111
-          python_version: '3.6'
-      - binary_win_wheel:
-          cu_version: cpu
-          filters:
-            branches:
-              only: master
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: binary_win_wheel_py3.7_cpu
-          python_version: '3.7'
-      - binary_win_wheel:
-          cu_version: cu101
-          filters:
-            branches:
-              only: master
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: binary_win_wheel_py3.7_cu101
-          python_version: '3.7'
-      - binary_win_wheel:
-          cu_version: cu102
-          filters:
-            branches:
-              only: master
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: binary_win_wheel_py3.7_cu102
-          python_version: '3.7'
-      - binary_win_wheel:
-          cu_version: cu111
-          filters:
-            branches:
-              only: master
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: binary_win_wheel_py3.7_cu111
-          python_version: '3.7'
-      - binary_win_wheel:
-          cu_version: cpu
-          name: binary_win_wheel_py3.8_cpu
-          python_version: '3.8'
-      - binary_win_wheel:
-          cu_version: cu101
-          filters:
-            branches:
-              only: master
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: binary_win_wheel_py3.8_cu101
-          python_version: '3.8'
-      - binary_win_wheel:
-          cu_version: cu102
-          filters:
-            branches:
-              only: master
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: binary_win_wheel_py3.8_cu102
-          python_version: '3.8'
-      - binary_win_wheel:
-          cu_version: cu111
-          name: binary_win_wheel_py3.8_cu111
-          python_version: '3.8'
-      - binary_win_wheel:
-          cu_version: cpu
-          name: binary_win_wheel_py3.9_cpu
-          python_version: '3.9'
-      - binary_win_wheel:
-          cu_version: cu101
-          filters:
-            branches:
-              only: master
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: binary_win_wheel_py3.9_cu101
-          python_version: '3.9'
-      - binary_win_wheel:
-          cu_version: cu102
-          filters:
-            branches:
-              only: master
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: binary_win_wheel_py3.9_cu102
-          python_version: '3.9'
-      - binary_win_wheel:
-          cu_version: cu111
-          name: binary_win_wheel_py3.9_cu111
-          python_version: '3.9'
-      - binary_linux_conda:
-          cu_version: cpu
-          name: binary_linux_conda_py3.6_cpu
-          python_version: '3.6'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_linux_conda:
-          cu_version: cu101
-          name: binary_linux_conda_py3.6_cu101
-          python_version: '3.6'
-          wheel_docker_image: pytorch/manylinux-cuda101
-      - binary_linux_conda:
-          cu_version: cu102
-          name: binary_linux_conda_py3.6_cu102
-          python_version: '3.6'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_linux_conda:
-          cu_version: cu111
-          name: binary_linux_conda_py3.6_cu111
-          python_version: '3.6'
-          wheel_docker_image: pytorch/manylinux-cuda111
-      - binary_linux_conda:
-          cu_version: cpu
-          name: binary_linux_conda_py3.7_cpu
-          python_version: '3.7'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_linux_conda:
-          cu_version: cu101
-          name: binary_linux_conda_py3.7_cu101
-          python_version: '3.7'
-          wheel_docker_image: pytorch/manylinux-cuda101
-      - binary_linux_conda:
-          cu_version: cu102
-          name: binary_linux_conda_py3.7_cu102
-          python_version: '3.7'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_linux_conda:
-          cu_version: cu111
-          name: binary_linux_conda_py3.7_cu111
-          python_version: '3.7'
-          wheel_docker_image: pytorch/manylinux-cuda111
-      - binary_linux_conda:
-          cu_version: cpu
-          name: binary_linux_conda_py3.8_cpu
-          python_version: '3.8'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_linux_conda:
-          cu_version: cu101
-          name: binary_linux_conda_py3.8_cu101
-          python_version: '3.8'
-          wheel_docker_image: pytorch/manylinux-cuda101
-      - binary_linux_conda:
-          cu_version: cu102
-          name: binary_linux_conda_py3.8_cu102
-          python_version: '3.8'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_linux_conda:
-          cu_version: cu111
-          name: binary_linux_conda_py3.8_cu111
-          python_version: '3.8'
-          wheel_docker_image: pytorch/manylinux-cuda111
-      - binary_linux_conda:
-          cu_version: cpu
-          name: binary_linux_conda_py3.9_cpu
-          python_version: '3.9'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_linux_conda:
-          cu_version: cu101
-          name: binary_linux_conda_py3.9_cu101
-          python_version: '3.9'
-          wheel_docker_image: pytorch/manylinux-cuda101
-      - binary_linux_conda:
-          cu_version: cu102
-          name: binary_linux_conda_py3.9_cu102
-          python_version: '3.9'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_linux_conda:
-          cu_version: cu111
-          name: binary_linux_conda_py3.9_cu111
-          python_version: '3.9'
-          wheel_docker_image: pytorch/manylinux-cuda111
-      - binary_macos_conda:
-          cu_version: cpu
-          name: binary_macos_conda_py3.6_cpu
-          python_version: '3.6'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_macos_conda:
-          cu_version: cpu
-          name: binary_macos_conda_py3.7_cpu
-          python_version: '3.7'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_macos_conda:
-          cu_version: cpu
-          name: binary_macos_conda_py3.8_cpu
-          python_version: '3.8'
-          wheel_docker_image: pytorch/manylinux-cuda102
-#      - binary_macos_conda:
-#          cu_version: cpu
-#          name: binary_macos_conda_py3.9_cpu
-#          python_version: '3.9'
-#          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_win_conda:
-          cu_version: cpu
-          filters:
-            branches:
-              only: master
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: binary_win_conda_py3.6_cpu
-          python_version: '3.6'
-      - binary_win_conda:
-          cu_version: cu101
-          filters:
-            branches:
-              only: master
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: binary_win_conda_py3.6_cu101
-          python_version: '3.6'
-      - binary_win_conda:
-          cu_version: cu102
-          filters:
-            branches:
-              only: master
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: binary_win_conda_py3.6_cu102
-          python_version: '3.6'
-      - binary_win_conda:
-          cu_version: cu111
-          filters:
-            branches:
-              only: master
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: binary_win_conda_py3.6_cu111
-          python_version: '3.6'
-      - binary_win_conda:
-          cu_version: cpu
-          filters:
-            branches:
-              only: master
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: binary_win_conda_py3.7_cpu
-          python_version: '3.7'
-      - binary_win_conda:
-          cu_version: cu101
-          filters:
-            branches:
-              only: master
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: binary_win_conda_py3.7_cu101
-          python_version: '3.7'
-      - binary_win_conda:
-          cu_version: cu102
-          filters:
-            branches:
-              only: master
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: binary_win_conda_py3.7_cu102
-          python_version: '3.7'
-      - binary_win_conda:
-          cu_version: cu111
-          filters:
-            branches:
-              only: master
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: binary_win_conda_py3.7_cu111
-          python_version: '3.7'
-      - binary_win_conda:
-          cu_version: cpu
-          name: binary_win_conda_py3.8_cpu
-          python_version: '3.8'
-      - binary_win_conda:
-          cu_version: cu101
-          filters:
-            branches:
-              only: master
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: binary_win_conda_py3.8_cu101
-          python_version: '3.8'
-      - binary_win_conda:
-          cu_version: cu102
-          filters:
-            branches:
-              only: master
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: binary_win_conda_py3.8_cu102
-          python_version: '3.8'
-      - binary_win_conda:
-          cu_version: cu111
-          name: binary_win_conda_py3.8_cu111
-          python_version: '3.8'
-      - binary_win_conda:
-          cu_version: cpu
-          name: binary_win_conda_py3.9_cpu
-          python_version: '3.9'
-      - binary_win_conda:
-          cu_version: cu101
-          filters:
-            branches:
-              only: master
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: binary_win_conda_py3.9_cu101
-          python_version: '3.9'
-      - binary_win_conda:
-          cu_version: cu102
-          filters:
-            branches:
-              only: master
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: binary_win_conda_py3.9_cu102
-          python_version: '3.9'
-      - binary_win_conda:
-          cu_version: cu111
-          name: binary_win_conda_py3.9_cu111
-          python_version: '3.9'
-#      - python_lint
-#      - python_type_check
-#      - clang_format
-
-  unittest:
-    jobs:
-      - unittest_linux_cpu:
-          cu_version: cpu
-          name: unittest_linux_cpu_py3.6
-          python_version: '3.6'
-      - unittest_linux_cpu:
-          cu_version: cpu
-          name: unittest_linux_cpu_py3.7
-          python_version: '3.7'
-      - unittest_linux_cpu:
-          cu_version: cpu
-          name: unittest_linux_cpu_py3.8
-          python_version: '3.8'
-      - unittest_linux_gpu:
-          cu_version: cu101
-          filters:
-            branches:
-              only:
-                - master
-                - nightly
-          name: unittest_linux_gpu_py3.6
-          python_version: '3.6'
-      - unittest_linux_gpu:
-          cu_version: cu101
-          filters:
-            branches:
-              only:
-                - master
-                - nightly
-          name: unittest_linux_gpu_py3.7
-          python_version: '3.7'
-      - unittest_linux_gpu:
-          cu_version: cu101
-          name: unittest_linux_gpu_py3.8
-          python_version: '3.8'
-      - unittest_linux_gpu:
-          cu_version: cu101
-          name: unittest_linux_gpu_py3.9
-          python_version: '3.9'
-      - unittest_windows_cpu:
-          cu_version: cpu
-          name: unittest_windows_cpu_py3.6
-          python_version: '3.6'
-      - unittest_windows_cpu:
-          cu_version: cpu
-          name: unittest_windows_cpu_py3.7
-          python_version: '3.7'
-      - unittest_windows_cpu:
-          cu_version: cpu
-          name: unittest_windows_cpu_py3.8
-          python_version: '3.8'
-      - unittest_windows_cpu:
-          cu_version: cpu
-          name: unittest_windows_cpu_py3.9
-          python_version: '3.9'
-      - unittest_windows_gpu:
-          cu_version: cu101
-          filters:
-            branches:
-              only:
-                - master
-                - nightly
-          name: unittest_windows_gpu_py3.6
-          python_version: '3.6'
-      - unittest_windows_gpu:
-          cu_version: cu101
-          filters:
-            branches:
-              only:
-                - master
-                - nightly
-          name: unittest_windows_gpu_py3.7
-          python_version: '3.7'
-      - unittest_windows_gpu:
-          cu_version: cu101
-          name: unittest_windows_gpu_py3.8
-          python_version: '3.8'
-      - unittest_windows_gpu:
-          cu_version: cu101
-          name: unittest_windows_gpu_py3.9
-          python_version: '3.9'
-      - unittest_macos_cpu:
-          cu_version: cpu
-          name: unittest_macos_cpu_py3.6
-          python_version: '3.6'
-      - unittest_macos_cpu:
-          cu_version: cpu
-          name: unittest_macos_cpu_py3.7
-          python_version: '3.7'
-      - unittest_macos_cpu:
-          cu_version: cpu
-          name: unittest_macos_cpu_py3.8
-          python_version: '3.8'
-#      - unittest_macos_cpu:
-#          cu_version: cpu
-#          name: unittest_macos_cpu_py3.9
-#          python_version: '3.9'
-  nightly:
-    jobs:
-#      - circleci_consistency
-#      - python_lint
-#      - python_type_check
-#      - clang_format
-      - binary_linux_wheel:
-          cu_version: cpu
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.6_cpu
-          python_version: '3.6'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.6_cpu_upload
-          requires:
-            - nightly_binary_linux_wheel_py3.6_cpu
-          subfolder: cpu/
-      - binary_linux_wheel:
-          cu_version: cu101
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.6_cu101
-          python_version: '3.6'
-          wheel_docker_image: pytorch/manylinux-cuda101
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.6_cu101_upload
-          requires:
-            - nightly_binary_linux_wheel_py3.6_cu101
-          subfolder: cu101/
-      - binary_linux_wheel:
-          cu_version: cu102
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.6_cu102
-          python_version: '3.6'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.6_cu102_upload
-          requires:
-            - nightly_binary_linux_wheel_py3.6_cu102
-          subfolder: cu102/
-      - binary_linux_wheel:
-          cu_version: cu111
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.6_cu111
-          python_version: '3.6'
-          wheel_docker_image: pytorch/manylinux-cuda111
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.6_cu111_upload
-          requires:
-            - nightly_binary_linux_wheel_py3.6_cu111
-          subfolder: cu111/
-      - binary_linux_wheel:
-          cu_version: cpu
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.7_cpu
-          python_version: '3.7'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.7_cpu_upload
-          requires:
-            - nightly_binary_linux_wheel_py3.7_cpu
-          subfolder: cpu/
-      - binary_linux_wheel:
-          cu_version: cu101
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.7_cu101
-          python_version: '3.7'
-          wheel_docker_image: pytorch/manylinux-cuda101
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.7_cu101_upload
-          requires:
-            - nightly_binary_linux_wheel_py3.7_cu101
-          subfolder: cu101/
-      - binary_linux_wheel:
-          cu_version: cu102
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.7_cu102
-          python_version: '3.7'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.7_cu102_upload
-          requires:
-            - nightly_binary_linux_wheel_py3.7_cu102
-          subfolder: cu102/
-      - binary_linux_wheel:
-          cu_version: cu111
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.7_cu111
-          python_version: '3.7'
-          wheel_docker_image: pytorch/manylinux-cuda111
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.7_cu111_upload
-          requires:
-            - nightly_binary_linux_wheel_py3.7_cu111
-          subfolder: cu111/
-      - binary_linux_wheel:
-          cu_version: cpu
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.8_cpu
-          python_version: '3.8'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.8_cpu_upload
-          requires:
-            - nightly_binary_linux_wheel_py3.8_cpu
-          subfolder: cpu/
-      - binary_linux_wheel:
-          cu_version: cu101
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.8_cu101
-          python_version: '3.8'
-          wheel_docker_image: pytorch/manylinux-cuda101
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.8_cu101_upload
-          requires:
-            - nightly_binary_linux_wheel_py3.8_cu101
-          subfolder: cu101/
-      - binary_linux_wheel:
-          cu_version: cu102
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.8_cu102
-          python_version: '3.8'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.8_cu102_upload
-          requires:
-            - nightly_binary_linux_wheel_py3.8_cu102
-          subfolder: cu102/
-      - binary_linux_wheel:
-          cu_version: cu111
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.8_cu111
-          python_version: '3.8'
-          wheel_docker_image: pytorch/manylinux-cuda111
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.8_cu111_upload
-          requires:
-            - nightly_binary_linux_wheel_py3.8_cu111
-          subfolder: cu111/
-      - binary_linux_wheel:
-          cu_version: cpu
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.9_cpu
-          python_version: '3.9'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py39_cpu_upload
-          requires:
-            - nightly_binary_linux_wheel_py3.9_cpu
-          subfolder: cpu/
-      - binary_linux_wheel:
-          cu_version: cu101
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.9_cu101
-          python_version: '3.9'
-          wheel_docker_image: pytorch/manylinux-cuda101
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.9_cu101_upload
-          requires:
-            - nightly_binary_linux_wheel_py3.9_cu101
-          subfolder: cu101/
-      - binary_linux_wheel:
-          cu_version: cu102
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.9_cu102
-          python_version: '3.9'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.9_cu102_upload
-          requires:
-            - nightly_binary_linux_wheel_py3.9_cu102
-          subfolder: cu102/
-      - binary_linux_wheel:
-          cu_version: cu111
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.9_cu111
-          python_version: '3.9'
-          wheel_docker_image: pytorch/manylinux-cuda111
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_wheel_py3.9_cu111_upload
-          requires:
-            - nightly_binary_linux_wheel_py3.9_cu111
-          subfolder: cu111/
-      - binary_macos_wheel:
-          cu_version: cpu
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_macos_wheel_py3.6_cpu
-          python_version: '3.6'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_macos_wheel_py3.6_cpu_upload
-          requires:
-            - nightly_binary_macos_wheel_py3.6_cpu
-          subfolder: ''
-      - binary_macos_wheel:
-          cu_version: cpu
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_macos_wheel_py3.7_cpu
-          python_version: '3.7'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_macos_wheel_py3.7_cpu_upload
-          requires:
-            - nightly_binary_macos_wheel_py3.7_cpu
-          subfolder: ''
-      - binary_macos_wheel:
-          cu_version: cpu
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_macos_wheel_py3.8_cpu
-          python_version: '3.8'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_macos_wheel_py3.8_cpu_upload
-          requires:
-            - nightly_binary_macos_wheel_py3.8_cpu
-          subfolder: ''
-      - binary_macos_wheel:
-          cu_version: cpu
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_macos_wheel_py3.9_cpu
-          python_version: '3.9'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_macos_wheel_py3.9_cpu_upload
-          requires:
-            - nightly_binary_macos_wheel_py3.9_cpu
-          subfolder: ''
-      - binary_win_wheel:
-          cu_version: cpu
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.6_cpu
-          python_version: '3.6'
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.6_cpu_upload
-          requires:
-            - nightly_binary_win_wheel_py3.6_cpu
-          subfolder: cpu/
-      - binary_win_wheel:
-          cu_version: cu101
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.6_cu101
-          python_version: '3.6'
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.6_cu101_upload
-          requires:
-            - nightly_binary_win_wheel_py3.6_cu101
-          subfolder: cu101/
-      - binary_win_wheel:
-          cu_version: cu102
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.6_cu102
-          python_version: '3.6'
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.6_cu102_upload
-          requires:
-            - nightly_binary_win_wheel_py3.6_cu102
-          subfolder: cu102/
-      - binary_win_wheel:
-          cu_version: cu111
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.6_cu111
-          python_version: '3.6'
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.6_cu111_upload
-          requires:
-            - nightly_binary_win_wheel_py3.6_cu111
-          subfolder: cu111/
-      - binary_win_wheel:
-          cu_version: cpu
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.7_cpu
-          python_version: '3.7'
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.7_cpu_upload
-          requires:
-            - nightly_binary_win_wheel_py3.7_cpu
-          subfolder: cpu/
-      - binary_win_wheel:
-          cu_version: cu101
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.7_cu101
-          python_version: '3.7'
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.7_cu101_upload
-          requires:
-            - nightly_binary_win_wheel_py3.7_cu101
-          subfolder: cu101/
-      - binary_win_wheel:
-          cu_version: cu102
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.7_cu102
-          python_version: '3.7'
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.7_cu102_upload
-          requires:
-            - nightly_binary_win_wheel_py3.7_cu102
-          subfolder: cu102/
-      - binary_win_wheel:
-          cu_version: cu111
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.7_cu111
-          python_version: '3.7'
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.7_cu111_upload
-          requires:
-            - nightly_binary_win_wheel_py3.7_cu111
-          subfolder: cu111/
-      - binary_win_wheel:
-          cu_version: cpu
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.8_cpu
-          python_version: '3.8'
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.8_cpu_upload
-          requires:
-            - nightly_binary_win_wheel_py3.8_cpu
-          subfolder: cpu/
-      - binary_win_wheel:
-          cu_version: cu101
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.8_cu101
-          python_version: '3.8'
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.8_cu101_upload
-          requires:
-            - nightly_binary_win_wheel_py3.8_cu101
-          subfolder: cu101/
-      - binary_win_wheel:
-          cu_version: cu102
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.8_cu102
-          python_version: '3.8'
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.8_cu102_upload
-          requires:
-            - nightly_binary_win_wheel_py3.8_cu102
-          subfolder: cu102/
-      - binary_win_wheel:
-          cu_version: cu111
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.8_cu111
-          python_version: '3.8'
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.8_cu111_upload
-          requires:
-            - nightly_binary_win_wheel_py3.8_cu111
-          subfolder: cu111/
-      - binary_win_wheel:
-          cu_version: cpu
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.9_cpu
-          python_version: '3.9'
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.9_cpu_upload
-          requires:
-            - nightly_binary_win_wheel_py3.9_cpu
-          subfolder: cpu/
-      - binary_win_wheel:
-          cu_version: cu101
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.9_cu101
-          python_version: '3.9'
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.9_cu101_upload
-          requires:
-            - nightly_binary_win_wheel_py3.9_cu101
-          subfolder: cu101/
-      - binary_win_wheel:
-          cu_version: cu102
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.9_cu102
-          python_version: '3.9'
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.9_cu102_upload
-          requires:
-            - nightly_binary_win_wheel_py3.9_cu102
-          subfolder: cu102/
-      - binary_win_wheel:
-          cu_version: cu111
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.9_cu111
-          python_version: '3.9'
-      - binary_wheel_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_wheel_py3.9_cu111_upload
-          requires:
-            - nightly_binary_win_wheel_py3.9_cu111
-          subfolder: cu111/
-      - binary_linux_conda:
-          cu_version: cpu
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.6_cpu
-          python_version: '3.6'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.6_cpu_upload
-          requires:
-            - nightly_binary_linux_conda_py3.6_cpu
-      - binary_linux_conda:
-          cu_version: cu101
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.6_cu101
-          python_version: '3.6'
-          wheel_docker_image: pytorch/manylinux-cuda101
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.6_cu101_upload
-          requires:
-            - nightly_binary_linux_conda_py3.6_cu101
-      - binary_linux_conda:
-          cu_version: cu102
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.6_cu102
-          python_version: '3.6'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.6_cu102_upload
-          requires:
-            - nightly_binary_linux_conda_py3.6_cu102
-      - binary_linux_conda:
-          cu_version: cu111
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.6_cu111
-          python_version: '3.6'
-          wheel_docker_image: pytorch/manylinux-cuda111
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.6_cu111_upload
-          requires:
-            - nightly_binary_linux_conda_py3.6_cu111
-      - binary_linux_conda:
-          cu_version: cpu
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.7_cpu
-          python_version: '3.7'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.7_cpu_upload
-          requires:
-            - nightly_binary_linux_conda_py3.7_cpu
-      - binary_linux_conda:
-          cu_version: cu101
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.7_cu101
-          python_version: '3.7'
-          wheel_docker_image: pytorch/manylinux-cuda101
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.7_cu101_upload
-          requires:
-            - nightly_binary_linux_conda_py3.7_cu101
-      - binary_linux_conda:
-          cu_version: cu102
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.7_cu102
-          python_version: '3.7'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.7_cu102_upload
-          requires:
-            - nightly_binary_linux_conda_py3.7_cu102
-      - binary_linux_conda:
-          cu_version: cu111
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.7_cu111
-          python_version: '3.7'
-          wheel_docker_image: pytorch/manylinux-cuda111
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.7_cu111_upload
-          requires:
-            - nightly_binary_linux_conda_py3.7_cu111
-      - binary_linux_conda:
-          cu_version: cpu
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.8_cpu
-          python_version: '3.8'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.8_cpu_upload
-          requires:
-            - nightly_binary_linux_conda_py3.8_cpu
-      - binary_linux_conda:
-          cu_version: cu101
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.8_cu101
-          python_version: '3.8'
-          wheel_docker_image: pytorch/manylinux-cuda101
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.8_cu101_upload
-          requires:
-            - nightly_binary_linux_conda_py3.8_cu101
-      - binary_linux_conda:
-          cu_version: cu102
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.8_cu102
-          python_version: '3.8'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.8_cu102_upload
-          requires:
-            - nightly_binary_linux_conda_py3.8_cu102
-      - binary_linux_conda:
-          cu_version: cu111
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.8_cu111
-          python_version: '3.8'
-          wheel_docker_image: pytorch/manylinux-cuda111
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.8_cu111_upload
-          requires:
-            - nightly_binary_linux_conda_py3.8_cu111
-      - binary_linux_conda:
-          cu_version: cpu
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.9_cpu
-          python_version: '3.9'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.9_cpu_upload
-          requires:
-            - nightly_binary_linux_conda_py3.9_cpu
-      - binary_linux_conda:
-          cu_version: cu101
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.9_cu101
-          python_version: '3.9'
-          wheel_docker_image: pytorch/manylinux-cuda101
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.9_cu101_upload
-          requires:
-            - nightly_binary_linux_conda_py3.9_cu101
-      - binary_linux_conda:
-          cu_version: cu102
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.9_cu102
-          python_version: '3.9'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.9_cu102_upload
-          requires:
-            - nightly_binary_linux_conda_py3.9_cu102
-      - binary_linux_conda:
-          cu_version: cu111
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.9_cu111
-          python_version: '3.9'
-          wheel_docker_image: pytorch/manylinux-cuda111
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_linux_conda_py3.9_cu111_upload
-          requires:
-            - nightly_binary_linux_conda_py3.9_cu111
-      - binary_macos_conda:
-          cu_version: cpu
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_macos_conda_py3.6_cpu
-          python_version: '3.6'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_macos_conda_py3.6_cpu_upload
-          requires:
-            - nightly_binary_macos_conda_py3.6_cpu
-      - binary_macos_conda:
-          cu_version: cpu
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_macos_conda_py3.7_cpu
-          python_version: '3.7'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_macos_conda_py3.7_cpu_upload
-          requires:
-            - nightly_binary_macos_conda_py3.7_cpu
-      - binary_macos_conda:
-          cu_version: cpu
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_macos_conda_py3.8_cpu
-          python_version: '3.8'
-          wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_macos_conda_py3.8_cpu_upload
-          requires:
-            - nightly_binary_macos_conda_py3.8_cpu
-#      - binary_macos_conda:
-#          cu_version: cpu
-#          filters:
-#            branches:
-#              only: nightly
-#            tags:
-#              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-#          name: nightly_binary_macos_conda_py3.9_cpu
-#          python_version: '3.9'
-#          wheel_docker_image: pytorch/manylinux-cuda102
-#      - binary_conda_upload:
-#          context: org-member
-#          filters:
-#            branches:
-#              only: nightly
-#            tags:
-#              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-#          name: nightly_binary_macos_conda_py3.9_cpu_upload
-#          requires:
-#            - nightly_binary_macos_conda_py3.9_cpu
-      - binary_win_conda:
-          cu_version: cpu
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.6_cpu
-          python_version: '3.6'
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.6_cpu_upload
-          requires:
-            - nightly_binary_win_conda_py3.6_cpu
-      - binary_win_conda:
-          cu_version: cu101
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.6_cu101
-          python_version: '3.6'
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.6_cu101_upload
-          requires:
-            - nightly_binary_win_conda_py3.6_cu101
-      - binary_win_conda:
-          cu_version: cu102
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.6_cu102
-          python_version: '3.6'
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.6_cu102_upload
-          requires:
-            - nightly_binary_win_conda_py3.6_cu102
-      - binary_win_conda:
-          cu_version: cu111
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.6_cu111
-          python_version: '3.6'
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.6_cu111_upload
-          requires:
-            - nightly_binary_win_conda_py3.6_cu111
-      - binary_win_conda:
-          cu_version: cpu
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.7_cpu
-          python_version: '3.7'
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.7_cpu_upload
-          requires:
-            - nightly_binary_win_conda_py3.7_cpu
-      - binary_win_conda:
-          cu_version: cu101
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.7_cu101
-          python_version: '3.7'
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.7_cu101_upload
-          requires:
-            - nightly_binary_win_conda_py3.7_cu101
-      - binary_win_conda:
-          cu_version: cu102
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.7_cu102
-          python_version: '3.7'
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.7_cu102_upload
-          requires:
-            - nightly_binary_win_conda_py3.7_cu102
-      - binary_win_conda:
-          cu_version: cu111
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.7_cu111
-          python_version: '3.7'
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.7_cu111_upload
-          requires:
-            - nightly_binary_win_conda_py3.7_cu111
-      - binary_win_conda:
-          cu_version: cpu
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.8_cpu
-          python_version: '3.8'
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.8_cpu_upload
-          requires:
-            - nightly_binary_win_conda_py3.8_cpu
-      - binary_win_conda:
-          cu_version: cu101
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.8_cu101
-          python_version: '3.8'
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.8_cu101_upload
-          requires:
-            - nightly_binary_win_conda_py3.8_cu101
-      - binary_win_conda:
-          cu_version: cu102
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.8_cu102
-          python_version: '3.8'
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.8_cu102_upload
-          requires:
-            - nightly_binary_win_conda_py3.8_cu102
-      - binary_win_conda:
-          cu_version: cu111
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.8_cu111
-          python_version: '3.8'
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.8_cu111_upload
-          requires:
-            - nightly_binary_win_conda_py3.8_cu111
-      - binary_win_conda:
-          cu_version: cpu
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.9_cpu
-          python_version: '3.9'
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.9_cpu_upload
-          requires:
-            - nightly_binary_win_conda_py3.9_cpu
-      - binary_win_conda:
-          cu_version: cu101
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.9_cu101
-          python_version: '3.9'
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.9_cu101_upload
-          requires:
-            - nightly_binary_win_conda_py3.9_cu101
-      - binary_win_conda:
-          cu_version: cu102
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.9_cu102
-          python_version: '3.9'
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.9_cu102_upload
-          requires:
-            - nightly_binary_win_conda_py3.9_cu102
-      - binary_win_conda:
-          cu_version: cu111
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.9_cu111
-          python_version: '3.9'
-      - binary_conda_upload:
-          context: org-member
-          filters:
-            branches:
-              only: nightly
-            tags:
-              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
-          name: nightly_binary_win_conda_py3.9_cu111_upload
-          requires:
-            - nightly_binary_win_conda_py3.9_cu111
diff --git a/.circleci/unittest/linux/scripts/environment.yml b/.circleci/unittest/linux/scripts/environment.yml
deleted file mode 100644
index ca96279..0000000
--- a/.circleci/unittest/linux/scripts/environment.yml
+++ /dev/null
@@ -1,15 +0,0 @@
-channels:
-  - pytorch
-  - conda-forge
-  - defaults
-dependencies:
-  - numpy
-  - pytest
-  - pytest-cov
-  - codecov
-  - pip
-  - ca-certificates
-  - pycrypto
-  - pip:
-      - future
-      - scipy
diff --git a/.circleci/unittest/linux/scripts/install.sh b/.circleci/unittest/linux/scripts/install.sh
deleted file mode 100755
index 6334cb9..0000000
--- a/.circleci/unittest/linux/scripts/install.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env bash
-
-unset PYTORCH_VERSION
-# For unittest, nightly PyTorch is used as the following section,
-# so no need to set PYTORCH_VERSION.
-# In fact, keeping PYTORCH_VERSION forces us to hardcode PyTorch version in config.
-
-set -e
-
-eval "$(./conda/bin/conda shell.bash hook)"
-conda activate ./env
-
-if [ "${CU_VERSION:-}" == cpu ] ; then
-    cudatoolkit="cpuonly"
-else
-    if [[ ${#CU_VERSION} -eq 4 ]]; then
-        CUDA_VERSION="${CU_VERSION:2:1}.${CU_VERSION:3:1}"
-    elif [[ ${#CU_VERSION} -eq 5 ]]; then
-        CUDA_VERSION="${CU_VERSION:2:2}.${CU_VERSION:4:1}"
-    fi
-    echo "Using CUDA $CUDA_VERSION as determined by CU_VERSION"
-    version="$(python -c "print('.'.join(\"${CUDA_VERSION}\".split('.')[:2]))")"
-    cudatoolkit="cudatoolkit=${version}"
-fi
-printf "Installing PyTorch with %s\n" "${cudatoolkit}"
-conda install -y -c pytorch-nightly pytorch "${cudatoolkit}"
-
-printf "* Installing torchcsprng\n"
-python setup.py develop
\ No newline at end of file
diff --git a/.circleci/unittest/linux/scripts/post_process.sh b/.circleci/unittest/linux/scripts/post_process.sh
deleted file mode 100755
index b05be6d..0000000
--- a/.circleci/unittest/linux/scripts/post_process.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-eval "$(./conda/bin/conda shell.bash hook)"
-conda activate ./env
-
-codecov
\ No newline at end of file
diff --git a/.circleci/unittest/linux/scripts/run_test.sh b/.circleci/unittest/linux/scripts/run_test.sh
deleted file mode 100755
index 61f6e3e..0000000
--- a/.circleci/unittest/linux/scripts/run_test.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-eval "$(./conda/bin/conda shell.bash hook)"
-conda activate ./env
-
-python -m torch.utils.collect_env
-pytest --cov=torchcsprng --junitxml=test-results/junit.xml -v --durations 20 test
\ No newline at end of file
diff --git a/.circleci/unittest/linux/scripts/setup_env.sh b/.circleci/unittest/linux/scripts/setup_env.sh
deleted file mode 100755
index 054ebf2..0000000
--- a/.circleci/unittest/linux/scripts/setup_env.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/usr/bin/env bash
-
-# This script is for setting up environment in which unit test is ran.
-# To speed up the CI time, the resulting environment is cached.
-#
-# Do not install PyTorch and torchcsprng here, otherwise they also get cached.
-
-set -e
-
-this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-root_dir="$(git rev-parse --show-toplevel)"
-conda_dir="${root_dir}/conda"
-env_dir="${root_dir}/env"
-
-cd "${root_dir}"
-
-case "$(uname -s)" in
-    Darwin*) os=MacOSX;;
-    *) os=Linux
-esac
-
-# 1. Install conda at ./conda
-if [ ! -d "${conda_dir}" ]; then
-    printf "* Installing conda\n"
-    wget -O miniconda.sh "http://repo.continuum.io/miniconda/Miniconda3-latest-${os}-x86_64.sh"
-    bash ./miniconda.sh -b -f -p "${conda_dir}"
-fi
-eval "$(${conda_dir}/bin/conda shell.bash hook)"
-
-# 2. Create test environment at ./env
-if [ ! -d "${env_dir}" ]; then
-    printf "* Creating a test environment\n"
-    conda create --prefix "${env_dir}" -y python="$PYTHON_VERSION"
-fi
-conda activate "${env_dir}"
-
-# 3. Install Conda dependencies
-printf "* Installing dependencies (except PyTorch)\n"
-conda env update --file "${this_dir}/environment.yml" --prune
diff --git a/.circleci/unittest/windows/scripts/environment.yml b/.circleci/unittest/windows/scripts/environment.yml
deleted file mode 100644
index ca96279..0000000
--- a/.circleci/unittest/windows/scripts/environment.yml
+++ /dev/null
@@ -1,15 +0,0 @@
-channels:
-  - pytorch
-  - conda-forge
-  - defaults
-dependencies:
-  - numpy
-  - pytest
-  - pytest-cov
-  - codecov
-  - pip
-  - ca-certificates
-  - pycrypto
-  - pip:
-      - future
-      - scipy
diff --git a/.circleci/unittest/windows/scripts/install.sh b/.circleci/unittest/windows/scripts/install.sh
deleted file mode 100644
index deba8f6..0000000
--- a/.circleci/unittest/windows/scripts/install.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/usr/bin/env bash
-
-unset PYTORCH_VERSION
-# For unittest, nightly PyTorch is used as the following section,
-# so no need to set PYTORCH_VERSION.
-# In fact, keeping PYTORCH_VERSION forces us to hardcode PyTorch version in config.
-
-set -e
-
-this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-
-eval "$(./conda/Scripts/conda.exe 'shell.bash' 'hook')"
-conda activate ./env
-
-if [ "${CU_VERSION:-}" == cpu ] ; then
-    cudatoolkit="cpuonly"
-else
-    if [[ ${#CU_VERSION} -eq 4 ]]; then
-        CUDA_VERSION="${CU_VERSION:2:1}.${CU_VERSION:3:1}"
-    elif [[ ${#CU_VERSION} -eq 5 ]]; then
-        CUDA_VERSION="${CU_VERSION:2:2}.${CU_VERSION:4:1}"
-    fi
-    echo "Using CUDA $CUDA_VERSION as determined by CU_VERSION"
-    version="$(python -c "print('.'.join(\"${CUDA_VERSION}\".split('.')[:2]))")"
-    cudatoolkit="cudatoolkit=${version}"
-fi
-printf "Installing PyTorch with %s\n" "${cudatoolkit}"
-conda install -y -c pytorch-nightly pytorch "${cudatoolkit}"
-
-printf "* Installing torchcsprng\n"
-"$this_dir/vc_env_helper.bat" python setup.py develop
\ No newline at end of file
diff --git a/.circleci/unittest/windows/scripts/install_conda.bat b/.circleci/unittest/windows/scripts/install_conda.bat
deleted file mode 100644
index 6612fba..0000000
--- a/.circleci/unittest/windows/scripts/install_conda.bat
+++ /dev/null
@@ -1 +0,0 @@
-start /wait "" "%miniconda_exe%" /S /InstallationType=JustMe /RegisterPython=0 /AddToPath=0 /D=%tmp_conda%
\ No newline at end of file
diff --git a/.circleci/unittest/windows/scripts/post_process.sh b/.circleci/unittest/windows/scripts/post_process.sh
deleted file mode 100644
index 2a1ac63..0000000
--- a/.circleci/unittest/windows/scripts/post_process.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-eval "$(./conda/Scripts/conda.exe 'shell.bash' 'hook')"
-conda activate ./env
-
-#codecov
diff --git a/.circleci/unittest/windows/scripts/run_test.sh b/.circleci/unittest/windows/scripts/run_test.sh
deleted file mode 100644
index 02c6327..0000000
--- a/.circleci/unittest/windows/scripts/run_test.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-eval "$(./conda/Scripts/conda.exe 'shell.bash' 'hook')"
-conda activate ./env
-
-python -m torch.utils.collect_env
-pytest --cov=torchcsprng --junitxml=test-results/junit.xml -v --durations 20 test
\ No newline at end of file
diff --git a/.circleci/unittest/windows/scripts/setup_env.sh b/.circleci/unittest/windows/scripts/setup_env.sh
deleted file mode 100644
index 6a73927..0000000
--- a/.circleci/unittest/windows/scripts/setup_env.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/usr/bin/env bash
-
-# This script is for setting up environment in which unit test is ran.
-# To speed up the CI time, the resulting environment is cached.
-#
-# Do not install PyTorch and torchcsprng here, otherwise they also get cached.
-
-set -e
-
-this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-root_dir="$(git rev-parse --show-toplevel)"
-conda_dir="${root_dir}/conda"
-env_dir="${root_dir}/env"
-
-cd "${root_dir}"
-
-# 1. Install conda at ./conda
-if [ ! -d "${conda_dir}" ]; then
-    printf "* Installing conda\n"
-    export tmp_conda="$(echo $conda_dir | tr '/' '\\')"
-    export miniconda_exe="$(echo $root_dir | tr '/' '\\')\\miniconda.exe"
-    curl --output miniconda.exe https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe -O
-    "$this_dir/install_conda.bat"
-    unset tmp_conda
-    unset miniconda_exe
-fi
-
-eval "$(${conda_dir}/Scripts/conda.exe 'shell.bash' 'hook')"
-
-# 2. Create test environment at ./env
-if [ ! -d "${env_dir}" ]; then
-    printf "* Creating a test environment\n"
-    conda create --prefix "${env_dir}" -y python="$PYTHON_VERSION"
-fi
-conda activate "${env_dir}"
-
-# 3. Install Conda dependencies
-printf "* Installing dependencies (except PyTorch)\n"
-conda env update --file "${this_dir}/environment.yml" --prune
\ No newline at end of file
diff --git a/.circleci/unittest/windows/scripts/vc_env_helper.bat b/.circleci/unittest/windows/scripts/vc_env_helper.bat
deleted file mode 100644
index 9410135..0000000
--- a/.circleci/unittest/windows/scripts/vc_env_helper.bat
+++ /dev/null
@@ -1,39 +0,0 @@
-@echo on
-
-set VC_VERSION_LOWER=16
-set VC_VERSION_UPPER=17
-
-for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [%VC_VERSION_LOWER%^,%VC_VERSION_UPPER%^) -property installationPath`) do (
-    if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
-        set "VS15INSTALLDIR=%%i"
-        set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat"
-        goto vswhere
-    )
-)
-
-:vswhere
-if "%VSDEVCMD_ARGS%" == "" (
-    call "%VS15VCVARSALL%" x64 || exit /b 1
-) else (
-    call "%VS15VCVARSALL%" x64 %VSDEVCMD_ARGS% || exit /b 1
-)
-
-@echo on
-
-set DISTUTILS_USE_SDK=1
-
-set args=%1
-shift
-:start
-if [%1] == [] goto done
-set args=%args% %1
-shift
-goto start
-
-:done
-if "%args%" == "" (
-    echo Usage: vc_env_helper.bat [command] [args]
-    echo e.g. vc_env_helper.bat cl /c test.cpp
-)
-
-%args% || exit /b 1
diff --git a/.github/csprng_architecture.png b/.github/csprng_architecture.png
deleted file mode 100644
index 3697c382ab74cafb70de5e961b7a56e3279588f9..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 92773
zcmd432{hH+7e8!@E8<?t5aF83JS4+4Uqr?-m6;@&30G#Wc`mb1N`wrVG7FI*M25&b
z&tvA{J@@%NJ$e35YrSi|YyH=HTg&S5z2|$**=L`9_THb*J|XI=3Kz~_K97ZkbwTka
z@-`M0UOW~S_6!6c{3VOpr3(CV(^6JeT~St+LEX{rfu*%M78ZMqnXxgMA}9N26BA?O
z&tEvMo_BP;9UL5f+t|CIzPbK${U_s(#z~3#`r}m4aa_#?tXCQz%dMnJrR?ug-lxz}
zGv4eh$W5dA5-V_$Qk{41j`;a;B}Fb({2p%~VeyI=*=S#1zH^_oxp}T)6^Z#+$1uKl
z<+Cip;3A5y!ERFZD~$5#L*t3ye!t;k?K>Tgd_f(~<71u5%)IfKS?HS~bliEH^!x>F
z`^S9zq>tg_hEJsxAIT90yL9qobG9jWv1{1l1>Vl_voL(07U>t`8-tCD%iaOcktFbK
ze5zqhFT?3gi0l9920s352&CxsRbt}dVJa$STX>V^EFbSWZln9~OTVTJ<Of`;x}u9&
z)HKqAh#g|$JlE6H)4Y%M^%60@6KhSUr`|j#r>ASCFZRo+sYo<&u=ZF?f~ZlJ#Xz*$
z=DLaxR8+98fzJ>u>>x`lJn#t{{9Fb<SXj7E1F;Ce|773?`2y$HQ@r>WxW7JQ&tSeN
zeMeSN5&VD0%+cK3*6E?$<7VQ%C*V~BmYTYcbybu_&FpNrP43&7nscLU>@iKS5GYab
z(Z>9-2?NT;+SW-FCC>QQH$=f_%%6D}8UFgpV=Hk+T@`f(SvyB_hU?sd+`Nnu=NT9n
z5RUgBh~7rZ|J@w?CeHZq@nd^Y9v)X$S8i7UZaYT{9zGEf5guNC9)5l<@C_~}H`~W1
zC@xzkrvJU<*L#rWPG*jl_Kz*?Y#A``H8Hhwek{(&i0SB`fBze&Im+^PPqt2fPYX<t
z2lIr7kDHh0pLc_%2+Uta)h$ux*1AYb8z3IghXfzr4MD_T4gT}!cbETbs^w(vC~Ic}
zT0WNeE#SW!pMCg$HvFs4yTAJs6uy48%QH{@Zi(Q*O#O@~{wL?Z{t5&waUQ|*&!S13
zSNXtoh=nDErHGW)L}4$d5+riU^#z78OP}MW)_jWdR1-<ejze})4tlWy$4@DmlNISV
z{UU*Z40%zJ5g*y)6N)Dt!+D-P_@+8@5Qp^B&8_}5>PPPG4=h|PTrFJ2wh&dly#@vb
z$J%PH0|Rr<(<UnBxGc${eejqWunBKU;hg<AA6*}QCBFQ_pPqph{MZ33q<H^lqrYC|
zYXVc(y-Xpb^Yo7|R@dF6`g5056xacoCMHfSe|#<IFzP>}{J+_uZ&Hwq{amW+n}yyi
zQmu}ee@Q2?K9nPWa6>fzm*k|@pj5YY4~7P6<87-qnnx{*xMF_KKam&jjcRK0UhgkS
z&#U3cp1-x+k56MAeR|+TX;b=J&{8~Pi9$B_c!b)86#rh7ZftBPPG_w4jI)FIEkz!v
zy6zL-fIEMFdIdaP#EO?=`8C$x;{vZRWOz}yjYE|6Yh=IQig}wTc=`>SGfe5<+VS_7
zs->h%7S2(q{u<`LJ9rG9zQxt0i2GN6zxqM>`ZV+qaX$aEmdyM3G~6aA<h=IBC11w@
znnI~7%<{*lKv)5pB*u3&|E18s<~4o}=lm_6LVKk@K7D};o?fYcpZ$9|&WygE2=8LN
zq47lQAD>DQfTyB9<{E$0UtdZHJ7-OO!__}OB>_+Wzb|lNh5vhs6$czO$>YbByQA*C
z$Gc<cv+3R^CF>1DSxFYQbz$iqo2`$R-j%;esGeq7xKx0WOLk~(eJdt$xLmy8OF;LT
zbMCoQXUTj<*qfvW$!}ZS$7V6hO6841DtTmIdAM5TBx`uGnZVZhGWqxP>zi14MdJJk
z`?C@ZT<#7%NU`Z@H{5x@eK1fbQL~l0){zdmCARv(ta2t^FL`z2j+%HJiNxOcc3F81
zSD2zzUXlEhMw-!R3Vr*J+iAPiY^iRuNu^7MPZl!oR?F1q9O}7zH}74{El4AgJX(Kz
zxKd7<Gv&GFZnW}5W56_wD}K1>XTM%n7*8##`v9J%;T6VJcC$--_s0u<zNrwlw42`)
zzE@Ue{IK&r9PS{p<;eeFde42Ow3lXVueqdKhA0!X=1H}0AY70<-B-J~TO5XWvJvI|
znuz<ui?HkW`W`~Z%eQORJnaN?RzpMw3|vP%()OBf)P5yjSxAsaY<^~17&MFeGPj(W
zU?`e?C~{i28CTG;)hRp`-sB&Z<KP+W<n{BJ(~gzV;n4QzZtacKCyQ@GY+`rTyico&
zY>SsPW9NKteLLMsKSew^y!u<6yIFu98sLb~_1V8V3E=U7+kBV^Tu_ia(6Aw|Jw0B_
z($X=we!QI}d8Op2Q*2F{_;u~k3{O^yU9DU9CPzUv|4QXjzUA(SE4?7QvCsFm^}YSx
z^pn`yy-z%Ew6yQC9M9gUqgJPt&W+PJz2f>KrW44lq?I-0^)ppbOR0_9HxEAXc*WEn
zj;xXB8hU|6vAA7Qu1e)MmFB*h1g#p^vd}T|bRM!u_v|?wJ3ZO%@;)if58et?93LJ#
zak*!i8P`+beY|ASKmN!}dOX=GU(wF3U9ZQX<;o3;6q*E^Cbze$c+?5_KR?PBRIOHW
zGOyH~p6ow2u;HKE7;zcqvMKp0AF*X0>$d$>!OXFnZO*#0G{MNr?V!U#;#pLU`)Wm&
zMt(K^x6}cebLr{zMAxme-)nTL{oY%?JA~|<rCm95N~@&<h6}ky$B%3&akthy=ES~>
zLG04%j#um!=3hmrI(6_V7@ovLc`zF|_i5qoU`rEj5FGxPlSrTv9(5itaI?2R*`KqB
zVu?=$J80ihu-Dn;Xrsl#gvcSRj{amlXhCo6Kzqnz>kG}Nq}sLG!^>l4IBVC+^iIBZ
zZgq<BTyYsRv8>&nZI{~xs-n-@pX=3gGI3ra{b-u4ccAX%r#{Q@A?Dcd+N~O!v5y?K
zt>P*_w?WdFOUQHWojcw(!#rL4*SwIid3$fOSKqmp=p=ET(+}LWNNsl5c&p}lEj<h7
zvsX3!^i~h%FroSEe;#4A6uzXXFFCnIAh{=f0G2wbM*eP4&?Wlgd4(ly*R(VHSD5^v
zBzBkZjc;E_nWYB1uVy`eYC*>DlejfWS@%I>K9k4Oa>%;)i{s4!vC)l4#A&xIsR~$i
zm$ccpTsKy!a4xCEHX3ecP-YI)tU7<c^>FdqTM>`&6i@sM&O4xEk)W?y3GmCX+kxy&
zy%tF7w0jBi5^RoCS~eS>D7)Yn(eoQYd^hj9&BW!(ZMiOHd7_aWyp6=A=8F$bmMtW$
z&^ypod0~Yla`X=D^38`GR)$;2kH>upSgRBAKTL*hzqwKKHeYEDW2q9?@lJp)zKQZa
znu<7{mX_P+^*x71G7WW!F6Divi6x*wF*tl-w^^;2>r5-1sZN}&?r>Q*ocR&<q2e=R
zSd*#bnFI0(=KLXj++d$9Rh!O>2vyR!I=dB8SlrHC_1Y{_gf)heQ0)y%f<8YCpK`*)
zqd<U6dkdXdjYJ6@AFp{AT#8=ok8q>0t9GD>-}7229Uvi#&(JUKGLOH@%2ocu(e1E=
zek~&>rp5pDRIm-o#&celK8M2cAXQyFoNX3`ip#j|oC!1s9%e&qz5Zwssrv-z9C<|{
zgz&lRsGAdt!9=bk$(*=^Lf6<AJLL%IQQSbqMl(&XKdD3_fhELhJ^Qlr7Ban1fvijb
z)!2x(hvD&UrgmH((C_Qp+*K`;J*0m!|B`bDCb8dcbixnEnJiT&+3`9(9LuT$mSh3!
zUaQWC28k)H^8R~P*U8$RO(#fX(l)dCE0CQ$<=_l=q%?S}VR!;`I#Z|apsyl-Cc#J|
zR8V5r#>gz74G%6*l3Ivp$beoNJJ~6pvmx)<%F(HL1oiIsm8^rcF>L1-Hk_~E$st_-
z-jZTlc{L^|aO}#c8*%&w|7#<d#u8dP`FpF&ApWr5B^zOIGCATrX)?q=gx5|wI3E8T
znDb(s;Fr0!!hkG87((%&claDfy48xm2<dSrSe&fUJHsyjoRd5ip`Y9S`Y88Z|MpQ+
zZKKcD*VwCfdT!t1ic8ws^id7(zCP6A<|A4A%qemV*Hid5Q#vUzFDk$8<N!Bk`p0Ll
zb-ho6c3!<Rc7C4d3-TwGm6SO4#2YCS2xM)QOSqG03w_1*gCz6#AKuSX22XoXi9W6j
z>C;?{1=MpPlHee*OmmyHe9kuHIf3ihVt20bRq?fJpUNiW67|v4<OlaI#b;QLu&qpB
z6SUCcH$Jr*5$$~T+uNYh14n>adliQsQxjGZ!LZliN--+7PH;bn!LJZRqiAycg5ayc
zw}P38_m-NjlIb!*_~vu<M(;n1_&zgU(&6-#owo_k{}3`T_0}bILJoPtgdaPNmVsoG
z<Sh1^F;F;9tu#rL@#EV-9uxD6QB%P>rLztD*4bqfrJ-QU6yrl~p<H1?4exWlrca|y
zjp^m-IMHdAy>BvKMT+LYlF8bsgJEgG-MaU(hCdcx*g$u;ttY;cXu61>Ok8L}-C|L}
z^+p8$ZqyEMH+SQC*!MmETaa6jfI=czN#2?n{h{uvYg2UL%OT2{MG4uxJlilKa>kIt
z^#wKWCq9Ku63ZpMs%}1p@5oJjrUItdOtMm4?w!=ERBv@I1k%`M&or&Uq{~WF%l!vC
z92C6XQ+3}Ng9p(*C76W}642U9?KE+ellLz3xP5^>;5}d_Hi`3kMT|p+U+f7^#d<#2
zjEE4l;g&Cf1Fr4Fb!zjeXS{=(gTedG>9mdMdrfSQwhq-#{OOO3DI$h1Dd!B2+a%Gh
zQ5vF543Vd3_>YFl)m;_p19}ZSjwn}zawE9_&(T<2m)A~lNHkxhO=P-uAgVcx5+au`
zfOHzVGRca=HO3ctp-kgCQtH4n+X=n|*A5Jjq94mmXYPEE4(ozhh?U-cOyBAJx_9NS
z;cwscg#gfk)Q!cE9`g(v1XCct?IJ8*Y06Rrmm7bd`2PCsLC1EkE#ttf5k6fu&ip%T
zzK&w(f`AFnZz1sU=#uMsNs0ba@(>z0IaLF`9};EWbkL`k)*rO7^U{U*YYw?tH;un4
zen0UxHJY5R;n|_+R0txZi4TveK?;hquEg*ys`?&E<cdQR%wOBWICP>kK!=#_9AZI%
zSjvOZRcr8OS)+shSBXWE3Ez|R8X?FzW?KcwG4P?D2S^V2TTtO@;1N9S+G=J;N{`m>
z=DXH#xPMo_Awx3iLS<e)^e7koA$SkT{{+7U^_`(#^WELUoS8{6yNHG<zglVD37b&#
zGe(ct7RA^(^0li7ADJWBZ+;?aGU#(3eZ87Eh&0{{M(Pl6_?VrSCh^62RYz!a0q)E^
zOsEm!X*vdNg&JV|F+n6tS%6QIh%a()ndh)+E!d13p}LO$O3hF0InzS{99LRB$kkxh
zX-y{dMflS(RyuK1xyiO#uu>uGHpSXi8C#e;VfxJA$Km|A4YBdTFt0}PAg4*p(T_~z
zIQ*!$JfHRXXKzmJB0t)uDG?ecJUb{q2%OKvol*C-qqwOAr)zLiG;L!H_{1&)rwiS1
zm%f<~qCd$w)9|e(^}s!f3^G5cp=Ncq&+#XskS!9Mx9wKw>?SwzjmV%2+hPs8E!P;w
zfjQfMDU?oqCwK}ws_A83Wu0ssPr~Umo1~@3r(BgYmSdg^=uAx?y-n#GWk2!fS#?FQ
zG?y(wI#{4DHEt-R$P7zKxT9+9com!Oa*^$R>v*n57}*$gwm@NX3VZlLj{MVQs>DzH
z_&w!gUhWif4=DrLKVcOl!H34*HTfh-Gi0a~cZX-=YA}Yczp$mDm9aN5j$o655bIeN
z$<aaL8R5eg3{?K2J4qi!aR{1yQjeRg@cGWY%(NYkNc1s=$xAXl=CyCK!m%s0wGPtp
zaq?Jyz}7_2P+*FsTE`@Wj%f*7r^oMv7yxU*MqCY}g~mg+h~R`8eE3bogCg?&jX{z}
z6NnehviK6gSJ0DM?}lm@H#f!nXc_TvPmPWjb@uzFIkVCsb@vaoWEqGwLa;?lEKH`!
zWIN339x5!~;+%Wtg_I)nH;0^GzM~xVSPXKbu;JFjgYAX89>K3qA>63<0TYTpf$wwG
zz4ogM@+C{WE+df*>ZqH$S|KA{H)_l+SPAj~99x!}3G*CHGK(@uVi?o`DjN(}zDE2J
zHzr|Byy4Es#ufAZ*UHbCpa!t)hMi%%I$GP$u0tbl3!6kPOt1`{XF~<RYJ&-=nk!~M
zGeudW2#Ur~8t_f~<zhOz^x$2HOz_+3>J-nN@2@HYyX7C+73(`{Q2A2Wu@l3|){57a
z89kE5=uEli$ndgytRkJEagZzF`?O4ukHIFiOAaI%)&fkiBvv}HcG{#{CI*JGi7n1P
zPJ(tv;Zt!_(k6tWzvDQdF#<c<PSz@7c%MVa18?-$N3bnG_<od3m8xIHrNloaj)W23
z3!q<TrsE0TV{#?lqI%x+Vr{*Vw99|Tkw#gema*a1WRx^znnV#d?rr?QlJYn>2`_u&
zI$embTCp<~Rbbpp3ce^0J``K5cQDh@gs%_#KpQl|l%5I^l-(p6e2sV~H&ECg3(fcN
z0v}5ooSTomsZBjrS5-1TgKVoE`+7xkVT>)o8Xp=OF2w&(9%by)&QQwT!8)2pX{u~k
z(JG1HR&voI8MY}mprE-cjxcJ=Ssc>g=E;KT%TU3a3XfEZ)X;bPsYkC-wHREz#z#t8
z53PbC7Q@X-O5Tqt$uo_ROkT>sZJ{4!33e$-^2(^o34~sx@Pv*nNr13=8$p9zcrh-q
z)gclq^kd?YvMxGv;x0N%m1?FH7p<-nYFku&5K^d`e&unuG%k{*&7lKLX_h-Mq_Z?F
z#z<hwkcOa|l*jbTlRVxi!jHZZU*rvE?MXm+nb7am6Kt}bLU|jZPyCq}`_xC{r?DD^
zDUql02G7~ry=H#gy^j0>jni!w-8h_n{v%SZ;(eM+e=Y~^s}EjHYz?AyA>bkD26tp?
z<&)=ZzoSTA3A}CH%k-;EL)T0+OeTEE#>9^AV>KEFNj4~<Oe3;oSJT(uNPC#Rh#D7&
z{cvYQ0v8v5_6vd(>StOXsNU!zqiX_H7AB%2U!f8SYKs;|sH{^xd_Zj42$9QSRhbPD
zK_pYtDVnFfdQ@?9I*d|zM56lVJG*!JPsK}3Sx3cVaIej7gsl6r5h{A+2ENd^UPe!W
z1E(`FY5cBg0Ack%T7O|CRzmvKM-o}-$Z%!Yc83}+P5iw52SOw5BMT0s<>B;xh`C39
zX@S0>Y9l|fbjhRO$P#$ut$3#HfNUw6hTZ~w$|9_(L{>_*s}$X8&MhSVmWxcU327Ew
z_B`VZ`MxR6-+_3S5Mjqp`7(EB4p5t$(I?tIa7Y{9z1`yLq)F9#v<hW`CF@d3hi>5>
zhwz3DXc(9-WxqONM)Pb+eWjl;|HPJN4<{}yrZ=xa?FYa)bpl-DKXQaSnf0~V1ft|`
zvYA*xEKn5kx>sy2ynK^vlzWe7Y-79Wv#(Ad^K4SrDHFDYVHSdcAwj<{M~1z<eG6IY
zG2*%tFh-K6&?`(Zj{Vs`^c<yrq01fGmUq_KGw=GHlg@-^-5{kD_~4rpi&E72&5Dv|
zRX98Wi&({#W~kKYrWDC)G!>M5)9-@=h^FRz@1;`74Bp-UmMneRK*aN$AiB>+vrZtG
zQ`SURl^uV0=Bm(yUGS|HRuTm)2E2fE_>l6R(n8T4+jOD8q5>@w!kE{|u@t0X4=Q3Z
z^7n`}vcDS#W0QNs)e8-;?rD}^`Pt0oX;doOmD_XYS{tyMTG|>sI9FO58xI+0T@R?~
z#NKVXPki?|H|I$6Wk@GJ#Ih;2P}GalOfR!gGgJe;^NA)mBW;mw)WLk@>&_z)!*;`=
zo8`UB5mB^GC_jeF2`xqyD=u$fYBIVR^vQ0na=`uv-$aeVdT3t0KRVfCB90ovGZaQU
z#hL1V(>O+=na6a|CfW&6+hTC@>CoI+9gcHxFL$brYG8C>h3LX&8VWacsZJJR?A-4~
z8mk0u?S-4P+T0e&V<Zi?D^}R=7|NO&{u~V3|03~SsD<sgKN8;LWRp#P!7W(b6H@>4
zJjIuMy&2iamysr92|;~?l|>zA^9vK0EH_=bgF){0Nr<#TF22K7)VN;)o|@+dF@s=4
z)->~HF7o>cKMSuk+%}EwtbeDfJGtP>f&C({IMotAmG}eXl38KsXC&D;|F_p#;>)Ba
z*NBg9O6kuUTpgGS@uKq6_<3oPxVE8~sSVCh!oeB9CQV&Q^>aN$N6lGElO!IpW@+BH
z&f8Yj{_yqiTN*U~V1NnAkO2x+YRw%`gs6F!kdHQbHPx1$ztPl4VkV}QYh_fb?7<^<
z%Msdl0g?MsGgCJR%P;6AM=nX^@Jd#UZC0hh&}RpQL~UxuPix6zVs=xFP6766*6eCg
zxA#77LG7Mv%R6rII%dR}Eo6jox24H3-*4GdVX$_h4OqXpq$#<PACBDRe6_MnG$)Ek
z61GF)Y%NyGdMYf#v4vW=<`|wPv^ca@!kz}`P`O3iqfUEh0+VTT?YzSa(Fkxd5~Hx&
zCGIM#biQ{iF(iJnxESHK{^`}TlgV-;3H@OH88%Kg0a`<Cv^areo4rI%ZP9!Ea@*ve
zvWZd9_DaVNj>a^9gW~s{3BMz#`R5>rpX+G$ykHx?agFgU=cxEJRgzVHMRFNOS|C#o
zdH`=i79vMXyuN;wpwL{7See>>Q&V(vE4E$1Dj8jZp5n7_GV?(QnDftT56e*ruYQ<%
z?;|flMm9j*K;|!lANfsfbEpy;Q|UaML^;a%&1jA<&{eIkEn1{OGNZ1|kvM~jD)PRM
z;N{_uVw^}8GVNz$Z!Xd?dSv}bc+k7C!;93|9Ep|D%_E<XW4d(x76tl-mUzc{+uJAR
zQ2dL554A6`+?7uk_%Ii7mnBHIAhql3AW7XmG!AOmT7GBqQ!h^*@m#3e4BO$QIRd<g
zUgcV{pC^A{2SLd{TYV~O*!V(2y#Fe_RSwOoT=#6A#zCb2ZGXEfnTZ)h={ZrZ;@%V$
z^&?3~mQRv~>^|&De%5G)A*Im_5Z_%_ib0AxI`ysGIJeO@`9uGFYA~F6<H4zL`9vH!
zj#JWDPSML`!ZH=f_z6^>DeRiwT&gCQ*z>h%DAOD)kT+0$umk#lNk4i_E?2@vTVceb
zlU#owzh-Bs@>7J{jUV>1W|eUXI<KP1&)^kTV2MSqdMH+@am1DjWo?OY$*QB1#VcxO
zldUfM^Q28q#rVkW(JqmDHAoe7t_O!oUsp?RX@evLm(vxg$f!_RmswS7U46<Vu>SBt
z)mp8)AszvOyR9&LQIwd>?aGrX`PIb2%I?COZM5cloMR}>+IJjn<E@|c5}{2fvp#zp
z!7_gW+CbwL<LZldge3^s9+}VZ=|4e53s*csw|$~%&VeiSy_fa0Fs8>*O=1`BZ_H(u
z*)pZeFr0opacNtti>{W5j^Xio6~`{u;=?&7!LmCf4KQ3KjSwL@(}!<h0Syi90nu-I
z8hY_2UJ#3f@=uF1&0WHMvKcvXDbST@j&U@WN?z=9D}@Yiono7iz);`^zlooBF2${9
zgsgvZluYi^=_Gx5C92b=d_;&xbSCx=ejy?3@`UhHrv&G2q3WOO52mQDav`2UHyUuC
z7LJLM7CY7?&L%eK)~!DOB<nA1#OM(j=nxs<M4M(l<Dg|rszDl6z`IkdM^RBQXjJ`j
zkr>Y5HE{GGXp=iHx6$p)%DlrQQfGUMZ@DGYRro`GGSH&2l~5mf>zQI~lOSatL!HAZ
zrp6t)^R0){+)VgPXnH<flWt0!iwyFjK5G|Q)7U3s>MK$Fngr|b7GEQ9lsWuK{cm$N
zvEVDBu^5>*Dy^!;P}hb1n?7gy4@f&p8IF*c$`NOvW`q$7l=#)m1gy$gk5HcDHNHh(
zin}DLH1O0|MrY;3iWX@;Ts}9PNENYy&LVX)XwdU}V~^k~440PJQgUFF`3{xduzra!
zm8<?rv=(!FS!#OXoxMr5oE39mu;QsP>P3Cx7%T(7?%e|%x->tkeaH0&Ap@DkcBl@%
z`4%WlCi}C1=+VH+O01zhssjzjvJYp(3*ciX4TDOfFCN^Gd8Y&q_6jLvT9j1|M-HNd
zHfS3B+3*NAgJTB8y#}^z(D@p4JXsF{2Z$=%cz0e|bJP{_Bl9n}4d!)L?ypBVvB>D7
z%Qh)Wq0F!YS)VMD&Q~7;)*SL5TxJp!>5p(ydLDFAhg>?Py;HP?x)tXZJKh>b#9IcN
zytQ;$%qY}eKP??LOE|`IKY9$ylx_~E^A2qGv)yI8VcWZ{;bu{030shs1^`&Z$#M?p
zJADb{4R_6F{p}idd*TE(IHxZp_I*|nys=)rAB<PF!TPQepVv72BW*i%$5t%mgg@Jo
z@|KPTu`xt|T5(>>pk}4xl_x5XSIXpts_qzTigizCj>ysU-c*!bwR8S=C5RO<?LM*u
z{?dWN45|px5F0&*dN#o)Hx_M_-NA>XN8h|S%3ys1eX+rgN|4U+Gh76byKIE6k9iX9
z>@J2T1L?9P>MM3SQz#}p$i%rR;qzVeGO-O)Ofp~R&1U(aZ@OSs&{*RpJt7!TekBsg
zy^o~tbz^Su`gs!{5%!7gPd_71yL!}jHwl$E1?3@>xxLRd8RthuW+<2!Wub`b?CTro
zthTiysC8I|?>YQ<mYwG1=gj%a27OO%^fmu9?`T@kdRryxBID;x8uRo8_x^~JH=UcV
za&Du+%{vcCTk!pK77TQsR0vKXZJF?#nz);8HxxrJW~2>L%DjY%29o>9<qOKxLY?+h
zHs51E4PfGIjEvTrWlAhuLwUzntgTlQ!`OFlH6G-?D{E^J-h%o-Q-(?(k{dcKbEIjZ
z&c9!Qeh|)@Fd_8+%8zI4l?!*=>cKl%3_qSSQP`)+;TUK|RDXkV%TndnLNfA*;84Tg
zF<i9NF5!I9H_ta_+Dr}jxW@RU#1EIkrMoWa==05oh?rBq7T{H2eyf-6OQ?oc%)hm#
zB|y_D^>PH8&N%`1Qg!rtEc*aYR;~^6k2kb~196{H(7uE?2F1Pv8Te_)X%Ct8@wkN*
zDmxDu4+e*ol(D1P>hsQg4D!(sj%PRm^|Noinemzr;UN1LLOf1A7;mci?auDq9kq&6
zrthk$1SE!!A#QxpnO}l0c?3}t_Lx!c7dJo`A-?CseEIh8KvQIHA`uW0ltO)J5~YE`
zVa7?N8WoHzGz;e3tJ;Kx$JX=?B7W_V@C5BWQ7Y|*e0p1x5q&<YSbM)K$A*yS!~5fr
zQIcblszJ67j?2AXk*7=fRortluWV3!fgeR18catq30Xu48)=3e3euwnzxI)wRH{0z
zVUJ5)LWb@tjVW}6@u+ZHIP_;uI%+o7kR3&Nn{?%c9b>}9!{#Ev*toi=_na9Ts1~Rn
zbqky>Rk>xOy-ZfqbPQq<QCR+7;tkBSQQ~NVloWC$V*j1Q#>vDWv2^J1RC%LAyO7CX
zC(W!W&5Rr4Z0S}AZ!r5UnLW`d(FPIPg7<sJI_+^S?#|!U$2K%4MVt3Us_WaF=Lyi#
zGEpf%?e30Ro<X%b2|Lz3APr9Mw9V?js66S5tLqcxrlH3{D~-6X)rkBEEo637eC*h$
z%H9QVhNpDy%SF$Z2sG~VUrxJV&-@(UYntg%i`d(?&}z|jB*R-8bv~>Jb=ju6n>z;m
zvcfQb*CAQM%bVZAo5!<LhE3`$1x&FGxn*m(uI(hOUy_N3uE-dNm~RtKNW^};^>UaP
za*VWpD>iH5uXAY<&+rB;hsq{QO{N}-Z7Eq-1Gy45Gjg!p?=r{}$${{ICT;9lD(H_N
z)zcdaY|J(~j?K$9UGoyNr&7|L5`Jm7ur+bgKTk90<Y8ZP`B93wd)4(X`{X=aLk(Wf
z1gkm(v`0vI)FCDp`3lHk+4m?9RZxs}!g*p*n(a~ju3m>|sMG4SuFQNAzj@n3oBT3E
zFZ#mq!D&%pP04TClO234J_B$%*DuIHKAUiwU&%6KT}*rx@Fk9!R}g!18~Oe*0x5$G
zz3$e2MQsR{N)=rukR9cGyv&V_*o0U#6cZPG=($-PsY9>*geT7YdAw8>9^@HI*X0TK
znEI+-QBEG;5I}mcWZcd;{2v$MS*njnJnJEB{3Pl!b~Obf@_dn>J>2o-I9!IEDub-7
zS5@rlwTOE>B|3a|oiayENrIk%kl@{Oc884Nq)Z+hn=++3xtYwo(O4qc>$6PvBRmYL
z+`b9^)(C9EZ2>@c@LClg9__`0_`~UmLb3=iTyy$}Y;3*TsQ0;KgGH*xXj2*%UkboU
zl1IdD);E<$Z4Gm!FA2-ybdis9xiN2%OwHg;v;M<Q!EhzIyMSm8Dt4{iyU2~ZQ9t{1
z<_11YW!4XO#{+o&s6Q`G$53SB+OoEr?h`Kpwwt)E26B@&4Wl=ot4306a!s_N6Q~J8
zmo^DiqDNeBGzVj~kSiX#=`psJ0|0A|Z|Hr>8GcN3{XC4frDwjV^uh0``BLC@6n(>q
z4Evih;wfX?EMb}TCHA+6|BKWC;J<l3kn|MqhYKnEPM-fJt!xuvgtTyV_zcGWo2~LB
z90Uqc&cR<6#Bl(7Kwq`~I7{;QZJEQ3QIuKbVoq*PubQMmRrSDw9X)sVi1Za|0~`Rt
zd-no9L*I^k8c5NVwab4jy2n;Fu)H-9MBh0_B60oqOu!;>V2_<2C=j=(Y6%Ca)YB$5
zi5QnL&)t$Aj_nr)xpBv@_v<os9bzCW<GUjXrTzCfHe7@aMI@XTzg0565-sT6iF{-x
zU$l9+R#*2`M@Qd5?w(^Si-LpIvCdCG;drJhJb{*Pgwap}K(KkYl5o_XYI1Q-Vz**D
zsmp^k)%#@M5};$b9HWk-VBwPDbxlL0&7pB+0BnB@5YaanU_4|DAPp{ZWGA<~Fr0u{
zz$Pf^6kVDw&)ew#BudjM>V2}6-V3Ie#$C1ie!Cu*^z}gX>~+gNEgcp5!@(B((o-U%
zJ(<e=*_0(A@rR=ru7DxHwplx=*8sY`rAP4HXbdU>i4*um0s`xIfI(X6A3s&0mBiWo
z;iPtOG$)x(kPk>BQ!fH}3qJ`B8v6^ja~2cdGfe$Be(?}64el5SvW|(_tR)IQx+t_c
z{Y*{hkz8IkK3SrK=YbD(9pE_hU^2wmTWvfLPWUxGNrYFRJMGjtT?->XU@6XSdjH^f
z2T(3N)b;ia;8>^g3X)2c`u5UK`f?4gzc`r4lNHz6pwvjL1G|%}oZNwZdAIJ=v+`)O
zeafo6OE`hE?r77^NEIXnG29Ryo63bZTmw!of2VDH@SengS@G<I+ALDzpePU<P2>wc
zxFk1uyf(rJN|z?XcsjR|-L#4CnLM}H)^l&69(euzh}3(pQQB0cVheLpwHHJz+^F4%
zxZ7dbu{YL~U$vSn9<pb^Yfu^hP^fy&hMQ3m2lLw)e7^o@krMAU2KIInGE{1zbGN={
zQ`*-9aIS=Ps*F@jR$k_2GN4R2RZQgj>yQN|Fph>=&)uloR}r0#mVIwQX1e*Vd^Gt%
z+WsleE7ZN3C4;rLCC$?!@4O(c_n#TRZ(VI0-w!Bk66!Z}UlClZ-fCu)JcE@pcT-|a
zMMjGCGzwaO%injA7LY1FJHdzivk01B6w~L84Gohjv&qo#)NJ=~c76+JoG?J&P)YqF
z(1j2Y%&X)?4hB%%%XV?QdjgmutuNoKpkq9bmyEh(c$RwKVEh-IpPLDFh*Z>Mh{$7-
zuAJkYVV;R&^aYF5^y6iVh!u~GFe7C<udNg(1N6Hu0K44x@EH$J=((1g_cQ@6cpZcJ
zl^4xph#JZK=1NmyGQIJ7wn;TFR3-MOX-x-p=h0^u@3%_Q;{f8$z-Az!nhEdS>zl85
zsk~-5jFJpDLUo9Bk2(apnDIL>I5<g++4_Eed9AZl(jsL;*yovs`owBJKt|HNXA*E}
zv)-eb`c$$@VnQ152@OdynTUh<wcG)<Wy>W=O@ay`R#>?ML|eK*f4ZlzlYzoeqT1t5
zp|m$SJFp}iCx1NdRaxNi-oN$aXRcA|!vT@G=g%G_TfW)ubgdQO0(&t6!8ct}adNQK
zYjk=r@E9PRSs0qv_d~N0=K&R`FV}S{lH^HfhQbISaJhBaZwv(&_C0f!kmr_p(wGZn
zjbB8rYlransqKW+nn+@thH4&;cO|zO1>XwF)U10=kWct{?T@T^B{aSq94N1z;fg5a
zK$tzm4f$co;i+0BR*Iq1RHW(IR<O{Ep*pG;RP2;^+`qTuYku0085U-RBB0U>YSqDm
ztAJxB^1@?;5>G>z>8Uy;jzS+$?Mit%rX3=lR9os}^(Xt$hYP^8Sl<(GA>RR<uq^mB
zIg`i~3>#wQeLFWHX&zG)f}cLU@ovym(iq3YJNUIvE~9{C&~pFeU0~t2#Jfp*KMTB5
zBPCDv5W9tqBsAY_DXP~z*V-G%Dh6DJEVBdw<xS_&M()`)f06(;?AlHQ7mWfAsra_K
z=v26XBhl#L=x%=*;MQ=rZoxE_8pC{j8U26uu<ruACbsx;FF499=+ygX9>PV0aaF4&
zZ?3l2r+hpPlGuK^9arFysiC0Ez#DB%EBhRhYsFJxI@kp)$?;oN-66NR)MAED4B}{(
z>T9+8U+mtf2z_Vv1oVrjMW<|1Mm$4KG>O*~&DcXu4mN0|s_4RN>@-d^HoPY$dO!PB
zWZN+3z{gx%wWxLV5c90(j!FlBEkk2Oliv43IO1HFS~PRgbZ_6uOX3N&V47fKs)iN?
ziOm><=MRUL8HqNdQ15j;`;X^}*EQccrP!1y2GKovs8PAkJaL<V-osuNogLrg6j+A-
z931`gYm}EDrI`dL?^DOEEAg__4Xxl@NUM3$2~M4RCLvRVOijcZ#VHhSvBJ%}LskXb
zl>;^TnT5!w4tdE)$)SsImu*}|hL&afeqTaw=Vm@Sq1bF>x?`RR<h7nnsaBroIJqxb
z*Ucf7v~2B`HJ?kzjK70a246ac^;27@csDeK!Pk({XO6JhI7-?m6<~aIZN-?lw%K%4
zzFF@*6$9f7D2{&u&L}VXrB(oec9R+Y*Q>}(43{j7$isA6PA0p7=)B2Q{48dU{M@Mh
zK|qcAh@B?pyz=E-&^`~6Wrti9g&NnAo!1CfT0^yn^#JfTRRldp;P*L8;^*6;>5gC|
zN*kX1k-H%*F=$Woa+)ms+C-+T7hRd0J2YxBygYH@E6$DTn$PY24+9B^1C1#{;&PX!
z6lF$&<ud2%1uGn~pH;%h8lDl8FuU8f2&*;}pHpK*zxF2)&5nr*dSYpm8jyW)cmy>S
zu@u_ZIc(o}Ve1G(o>>4RxuTA*_D(o0g9ra2)dZ!??0W2^u8ij8Ht2dGpxGIf%C+yq
zCrK<)E8Aca8>Yks++Mw?ccuIX9Ej>j;mKfmi8qU)C_VV9>4w<5_>HQ6X?~m!-L0A-
zfif$7LMTq)1U0PHtsNg0h`#9ydvmc2oF~%*Pno2~;ew|}TXS;N4n%f|pB*mF>@!>P
znG!X-5b_ZXKYSJ8s7vmT@rSO5T@J>FFb)E9)y^p@=T<E!^WxeVZ(m+b(Wt#7%mjqI
z45(hJ$pH*&NdJl{wV}KTN)moDuGn~jR#4=OQbXb+bQ{(u;YOW(I!3AM%l96CmD_S@
z$n{?tm%C&)V<s`-60DJV47gsMM~OkufW7UWgjzw$m)=n4LCR{!c|AiTqsx=j6|N0V
zM11FAe8ev~M&$zs(Lv_}gKjA+d|zZUv_)l_FlRMSaUh<pD-8i=jZ-#_9lO5w@z3Jj
zdv^(JrckoFGPI#n`JBDhm({LE>O@K-)1cf}Nu6fNSMUC$u+lbS+p+*Vr)0B@qrB#P
z-38qVIXP3$ohrd<&jy+Mmper@6g+IpM_fdzxoZL3`8Ke7xMNSH$qxIgGy4Q*b)oQ%
zZh}2R<*cuIEF~3{={3(XEIf;63+SHJUmmT#eh4wern^@CGp8enY!ttG;Vtctx_LRC
zwo$<%uYJWH6XLa$+nT0y{$HTN4NiEZ;egN@^~wL~wP$M(S$1|O(d$-r+z3B!h$Vy}
zSk#()Bk)+tUWWGVy(G1)&f)W<S)lQ8&z;$i1oRzRPlMR|6^tPoApz?jnQ|VXNIr=I
zchg%|Gt@Dd0JvbvCkKEL!|>pSgj?0!V5JV^^YcvGAg?_OdwR)P>s-EW))6x)X`nip
zvYAMWYAZk97u`;_wA&N!nofe)wc;&>B7A&ZY2<A)dStRgW@g@RB$B9cCOU=(=#ask
zA^u+thXNBr!J%Gly0HcbvWHkOB5W!<L79<cYV(Y>4M&<bi1R05`L&9<w3ne(`e$V)
zE(>Irws4-N=N29#b9Pc85_Op(gS3|zU-m;bQM#-kqmDYU&|QL;%>D3hfk(nxVF*D8
z6LWF%EkE_5^}LU-@HB$VtIKGIpDev2Kq0=BmRF{#)=ho79r`FwtzjOeBDMnO+gm9g
zON08|Jn)<dpcE1!KkQ4~I6|UqVz-PbC`_^Qc+rWK&TZ>Wy1Q*01=e|Y64cIj38Hxx
zXCr*H^nkZc(7_nvj5^>f{dOBSe&d`&NaeZ&b=$g21LSSkk?M>nGYi}HdAn48JMGxJ
zfB|jhsGB|wlT8e<7jC+$tB_W7`v*-weq{++1{2&2;w^c+pteOnYRnuw=h{>xwdu}T
z{WY*ywX_1F4e@|vK|Z3{8Fw6>Blk?5Q}?(W!^gF5#KyIVh>Yg64h7vC`&T^!6%PoW
zrsPIfb_B5-N(vndb%YY3g3C5`?s{e}!B;xiYvL}SOG}qxU=PR~Br(UZg^zUN3sj#q
zhUhd*T@A=?FitLHXvjn{>3*Z!zh`<}xzqf9!b>|KK08PVhJ!TSfPOLkuAc5(P{mUd
zV|onrV!Z3r5v7KX$&veDB2W~)$(Km#kdfzKYxTg{&UmTGiXEYuyh1vhIRZ!^&RSNb
zYJKmW$1u*#4P+N2NcMilDb!ZUF0b-ui{off3th^vEXg%aM_qX21Q(^94ad34iM%_j
z<QZgv(MqNsXbZ;>SXs!f-p^ex5OiJlYDYEnU1>BwRt{<*>t&7@I1`UBck^Jp*~~2K
zxkNIE+Qn+yoWKMau16d41G)LSF`@frE1TK-84Hnd<|tK;4ZqL^*<fGpG-={Q!WdcI
z&~>h-_Y>fHUxtR?o1E%Vy_9!0f5kP!U#$;0yxLd_X$79Z1|ymIK#`43IAK33`rF;q
z8=9!XlEsu3dT)e|&X<wK4k+bf*^M4IxN^*^oTSo6!+FJ>L)N8bvyG|cM=@(*_QIQ-
za=O%8?yc*bOy6SfFG9k%*+%tgqjU_MwO+l=ym{lf^W{r@%F)X7CWNx{x#w?Lq&jPx
zz2hg<F?;X@x$mBRXw#QB$@#DN6c5D5Y4i9rU#;-gH5(dP91mhw8lT}Zg*aN8dojz@
zFIL}#5OQ#BPVz=EkN1ftMWOt%c|j;Za+9LN2lNxOR5omDh31M0t}J=5lBo>Qb2cS1
zBu{-p8zYQBvTV|N9`vTuF?>eeq#MpB_f8y(l;gLquKVnC=aT0t;Hpdaan|lly3rXL
zI4>1KBXteNX3o9Zj#3vL)QXC7bV9qEXu)1_3Ouz$ctPWZ4L@JKC}TdtC-QV9{Jue?
zBAOP>_%wt`JD#F-1Ll#Xob;6Pg2fd?`sWz*9DT1AN7P6q^DuhB1YO_f)}^&H<$iUN
z@t)Ngkb`WV5{GqdH6iVNs+kf)^_A7y!&T|wnsGm(&FV*V^rQBd{0Qvqb1&Pigxcrn
z24*Rthc_N=_E`Dyd(8cut?qw*#Fwc+G-l>r&Bcj)u?`8f5wzlIZ@d$Dq%dQgWS+UJ
zP-?(gAkeVUaMQ)$k_V&Wb#q3e+p%lY#T~iZz_E7i&H=3IofmGIu6IX7x4&3srN$40
z+2=^dh4+LH(xY!2WIURF6Uy7N$%XE7WZLg6x+ueDdcK8x)K1pzYsCGO)cjkWY@>TY
zgO}_)!$hIRjbYi!WRlOF(?G$&(X7nqBi)1-DTRH77?v*U7TXL{`hdEfABRqOwQUCt
z*GA!wwBCP-15=A-8&i@-D93<+uQOh~u)(r<=^e<0%w!-UAn1t|hqkL()0QU_A@4zH
z$c@?r^6Ea~Mp-{}JzuoBy*uzTGuJnhM4cZsHnR6etpuQ&tX7SVCMjDcIVnmX*-a}}
zf|JV0khzHt$&Abds8*SVS{1o@B(tMrz{eF(hR{Z*%~x=jN{`d-rwGUkWL>!NgO~C<
zNOIEPx;>~j9V)T(*KTQK8GX|tn)x{(8_E5gS2!~BYxzXxCSV^T+-I703ILIJyH(AL
z-&<~P;)~lW6Xt-Z!gArX$-{@%cEZkq<3aOsBn5l#5C`+GLL`VBN|)Ow^2l2#=p2r0
znFuKAE+D6%g%E{zf$;c>ekDGq?Bfd>@vryHy|Q2SJ5`+Zp_)@bF%xRnXLwB!r#h8L
z*8XZu_~(TNSHtU3A2hBz_BQ2y+Bg_rF+)Ko%w-Imo5xe{|DbwlgO-EEqq86(zI=##
zE`Yg&FgOXrw(pp-amc*TXE?fZX&;TDjWxZ9k1lepI~;L~rfY{@8RA1m=~{q#n^rm=
zOmN@1Ujm#o!FUaw+`<BvGUpDymitj$O?Xs|_8@Dg7A*LMcp`GAsFOfq-S>w?|0gD5
z6H?iWc~4#n<`(E}!N?*(9gF^zkVc)m=5fnOjS?R(`r>v8ZPJ|^v$d(p3!2IojPy6;
zug;O#X0nbNJ8ufdvD~J|d2rR9#Nx__mf;H<D4E!4;g7f>N2*kHie3U*M6l3(XFH<2
z_^35ZRZv>q$s4nZ^na<B7KWktKc6!X5Wz2yJ_V@?X+9NURROW}dfcsX@|eJNmIhNO
zE~fD3uPjvGXBfHs<bC{tperuAvfJ$9nd`KSB^+ehEl)fcZ9mbBuRoApzh;xY^R)W_
zLr=h=T7rLAR~Yu*e3mX)?bjx%ry}rzZQlv)0)G>;8z{PwUkMD)<(1>_U0(e;InD3+
zgH&20@LKGdvMzeKiBV+`<8**1Z9un9pWtiwbIpkm*+WL_5D(!JafJTmOC^cH>UHlU
zxI)YI(NcbtmrR-E#lEv4lA!JLQ79{e(hNuX+;-7@)w-{{RFg_54Xvg5{+*nk@nb10
zj&rUuC*0vvi=eQnvlntR*e^I!3#8YAXdQ2gP6jhwmZ57=to{n)VMSmsJ+)4-drTBn
z|HwUOthL}`gKqFpAHUZbmP52u0a@aN+rY`kf|_ghss;?*XME*HzC6+BP$X0q#z9Gp
zn>Xrx=>$20YHwy&Rz5-8bxb|aXU{hXxDATCghS$o_v!K7Xc@r<tB@ZmjistXVx9-v
zNWX`8IS>3~s#X}bBcAgWGA`@^sSfR&lvhQ(|AEN_a$bumtQdJNg+7jfBu66<@y#-v
zSBHN*N*4M`iN0ROp<0q-$H?suJH`~g%-mmMZl!B0wiRBow~h%<H>3Xgv}twaBz|~C
z)s}~B<XLa%k&5h!Le<ZEsyN3|KHcV{%Y$vhCG<OuLVp(@vSJe5tv?P|-EZMAw;6&Q
zmm_2tWQQiO>R7la!Iv6Yd|atUEvL&Gwv?XL-)DSLD46tQ*z>RYL*Edkdi4(>L&d8D
z!Gs<B?VKU@RN=^v82HTBmYhNVl19i6jZmeLH&&i!6xB=`Wb--Mt0HPyq$clgGRCwA
zAqW502FB1~3w(($eV#xRYucs#OlRX(J{HjVn-3$pl4Rpwtfy&3kGFddi}uUzf<(0j
z4vJ&Mb<9)5K~f~+->G~iUrIxjzNmNQU&yF%ahON@^(={{st5^{`EjPTH-GTAijIu@
zE~g}q29X<i;hm8`R?PBJ06(VL@BQ$ZB>C@;plw4B-m<2v;#oKP*VMz$fqK)3b2EhR
zBb>U#Z)^+@m4O1jH{~GBQ`3EM_;=>~m*bRJ0y<;08E~zgx-sVAG-T|5p$7=LcfZX*
zAJ<_iMuBd@G(!6i^_unepz!fR&=9kcMRh=?V&vG!S=Ihurhl6ljOM4Z&LH%x%kryW
zx!VZiujSTRzdLJ@e+_J&8UqXTUJ^q7X`b7ZK&4I=dGt8WEZ#pKp9F%OP}k#t3ulpv
zf53-hIZ)nOchKJdr`lNGG&m@n6)-T7{WXx^q9==%@`fc=jLeRLqOlc<(jRy%>rLcp
z+Z}1*!1T46y|<`r4E!~_U!tyOX5oEZSON-_UV{~JeLCq-o{Ogq3hfs29QM7wn<ucG
z{G&<-DfJ+X85k2YUiv#t`|No=ahJH=v**55W30Y4yQ5-U71K{G{VxbTbc*<TRmf(j
z1YLg}U=3ft`;`7+KHU^x1xf>2Wqv8hzlVph=30#ZGD`o5iR@t*kfL59C-XmYJu4D*
zuu(KwCSCuuvJAduKso+zcDtKTE(~Ped$iS+4)SABk85l>{s1o-AAxa1&>t!m&se0n
zv92uvI7r-b=lhl4d-0cGl<|Qqp1n*MQTWr0c+Ue}mB-G}`h$!ad|RcAXIl%Ot(vWo
zebDV{<@t3N0PsS4rlKU%Xg+{~iOj2a-fY{N*_>gC{v&H#$b69?R*+Hi)A|z2-kxu7
z@AUTT*`;DC3o|PKlZx}kTG?6Q|E*k9aIgztIFEl$z(B5u31m~YGpjdA#FnE;M5dz!
zr^4wX8(%Hl`!&UX*4}pmZp{8;&p!Q|o;Yx!dQF0^#8WSWIKZ-?W+(X@;)mORWFS>X
z28z)K(w2H%=mAKu@WP*@?cFJ++EQXGscU;7ZP5BZZG3>t5ctjE_4{+_>3fZ{iDr={
z0Ns!LUN#c_hb=$CW_&wkRiLz42!g>~LZXHLm1X(gEPPLZ-hHd{97y1j+#C1r#pE=H
zRX0~L(2HjUz0ke?S_|J;FpTqH7?-W`V!$M;O2)hnGXd7cW9Qm(#Z?(Y5>G7sc;OGl
z@ZI7-DMrjJRb-)wpajq{1Y|J8R8oYv)W(1bSOEFl%cd%nw|>KwzeL;}2Il^xNQYHA
zrQgW=_^%S_wFL|U2dYmbmHld5D9-{h|65OTu;F@|7Q1SPQW{CbIP0!N%CCW1s)e^%
zX^Xi#!@B<waQ%HYa9Vwj^5Gjih4|*$K6)8Ip%i?J3hu!6++L<Mc<jG@kj<Om2<+Mq
zTMyyE3%eu^nNJp%W$6#pC)IvI>t`nO;uPrRH*e1eVSjj0w3Hg=eu5C&LfC&43|I`%
zmYwsbQTlh&{|p<beK$EqJ{dID|H5y6srbAk5c~0fMVan@R}(<8zd~O4pA6GM6tG&S
zKa;rP|Jhw<h9?gEg12myALV~7)GsrQ`Lgc`6VR>y|4r97$vxwekm+qGYUjH}uX%z&
z{0S%^&jd&-6Ia=wsb#<=?VGvzm5-7g<ga%sIzT@>J2T_IKJvZ<i=mRex;&Cv0cx88
zJh~Fg4Zu+82z|4N>pgrew9hE+J$xzM{`?Ozm0H7W{gK)vUM>JIwe*OM?RR1@<Y~_#
z5dQV_+~P?E=xp6<OyYO}Af&2m?qi2Nps+p}Q({S2JhmGApWSYpL9VWwJTLwO_s|u<
zX*ZB8O|LL{<p^y|0l6h6{3#!OVkP?X+nrvaVe4Mq**_^tjY@r!P|b)Wd4@Av8p?Du
z3RA}iYB(NngHm0~ROkNI_AbO$$I*i1A9V1*A2?yhU)4N^+W>aLD-KLf#WBVQ5}g1Q
znk(%fk2!`|1L&;T6M(b#5DRqhW`~~z!2i)!jFG4rU?b0wt3Kvh{^8W6a#Z>rBw!`E
z3hQy+g?>=}uNM2#VcZO+D|{L$8mv>JAd%e*s(Cl#t3(0BRj$7(^M?UH2?CnEw*3ih
z-3o3{=pQV&QL~_832JKXJfcD{qg~i&p;rUTTVP_3{pk$l{qJoeMGxh5bESWLTk?hj
zh$@uBIN#qK4ZxtUB0;w+B&RipKt*CNK<ZM+-6Xd<1ZGUL{;=(V{Q!R14-xWAOMpyU
zjK<yKsS&(lUyr@8kd@v!mJZSuk1<IEaJ7T5cRI);T$7#KR6hNaaV^H!fHEsq)kdN3
z_n(!3a^;yTqC10Ty_iB@41zc7aSM~jPOAfC3k)J;Gr3&ieiml>-^>#6z>8R=*`jv2
zF&8*2gbQ@u#KfA(py0cQTn!O}QFZy^zbF%62W5d1HJ7i8@J}L6_i3$iuZv<u5?KJ4
zwgjX)=ISsjIRY*~u*4Lg8{PtJjA8Vkf~(AbR+aZVg!n2arjA^#H%l$uTK~t`Wt{4Y
zcQir(fMGk4HwEynQRPG+jr{F^rC*LjH!hyGu1Nsh!vK|Eo~H2!yugd`7-GodsfZgS
z<z53R#(3KL4xiEnUlH+C^KNAQl4kc99?Fft`yVdhv7+%!laLXN7r$xby_)fEmBJVB
z!ku$-U;|TpBBp5VU!H?2PR(OGzExi9zpdm8HNwQ|DmAgr2Y)!t7<@Na!9g=BKECq@
zjfj>?#~6s-$l~w+r`^HyW?T(;$PuojA3XmcaOUnTuyzL}Q`Tkwu@T@PqOt?ZHZ_84
zYxEysgI8}~1@4c;if{_opALe5Qak^ROMvnBR00RV|E1}6AVx3PACjlkgQERsm>j8~
zd0T7=Z~^;Pm%-HoPwCvIV@jIHZoLLEa8X&fz#n8f|J$ZuXbK15QisId1>n5g;&E%e
zg}*$uz0li>xxfRl22yUfbuUY`@cc*kzN4(h?D}@QYGdWxNG6wQP=QTTBFSeLDKdK#
zl!<q~nCbkba%T^HSFm0`ZO4>@<6)yafE#grtsPuias6RBW^*CzdR1<y06O>K|4Zw{
zcpGqehMN(0^?Ji&z|9ieS;E|Rg1HOi<m4l%S0UfnpLQUy$_=-@x9=F0Z(_2mua4)^
z+-=PcmW#VOd;g+kfEy=hwo?BzZb^*8p&<FwnFEur%)+p8+~_ekq0DxHmvDP7>5aAZ
zVxa3}lR)Z&C-gSe|4}((exS+8;WZK@hsq=@%NXO2Aw~86wU8gCA`y0ALw2T<?~j2%
zBK72Cw=T=7phn0Wa~F?iaTS`=5L`vF@G^+*(a<ag7B~ic1{HAoj6vn;$su{eOIF_^
z)_Mn5aJ&JU+Jc%}?@_0)%ap!A|B8eA+2u<l##l62p51y0KqazMMpv7FBI2$s`rf|~
zZg5i%KP%g718~QVCBQOU5BGju+oSA(k+&trtp!J5`-@{QaJmZQ0tZ~O?-)ZkS@>e-
z<%HoJq!4`7g3VzHL6>TFN(-)Nx;i&w_-O6<fWy`&7qONOoDA}baENjy-@{Wd)AZ4;
zwB6UAD2)&`GhBA9b~}JHGPAjh@l?f+)*EO3AG+Q;DvmB++kPMfC%C)2ClK5v5FCPg
zNN|_NLvRmHaCdi?#@!{*I0SdM@K(<=b7szW=KG7aSlw0CRlD~7$#vhG$^8MqEi8`k
zP1Knm0WW|hhR1b(be^z&;fBpxAnY%I{(?xCpcK)wh=+LqaL*3Ay>|5RKe7@SkQF)J
z`}sBFCy?~C_d56s0jrP22y5fVQ;&sK%jmvOkNuo~<=S~|5toNuKi2>?7@<@8PW->q
zgNLLpATLg({woBwsI~KGPH1am8(4}~OUG*Ufs8#BxRXn8X4Ns6>s0s~_S@S0P*aLP
zwL(Qw?LMIv9x^$@J23t<;7pM0bCpXz*MOz+1>oXrP`~G5d>;c=4@^S@Z($(U+y^dB
z0ZMF;Ns_zFs=ONJNbZNe$uO6P9fAPB?tTL_(KQ<1J5+bxZHCVPC{+w}&Xy)@i<NDP
zdhfcEpvm)l0Pa`>nyDQc+_oGI;auK70sYeDJ009UB9uS8FauRJEStLAD8eDAQ0Jq6
zB!MZ%hS~$S9k=cM3&8t+t6gU`nrws}Ep(%`n(G=B<MYeK0QKyC!dG|<2wv`+@>bd`
zE`aFlBMCN8-?x)##TxkNj_Rr|mWdPf96ljz(eO8i>4)ssRA}nzUa7%2uVz>Y^<)9q
z4F2;jWgroF0LtR3zPE;k^?zQG--+6?Z`fm|e5dc0tOLYA%-#UZG}hWMEytV>r4RRk
zQul<h32?n@(^E3!>3u}VUfa~<LmT`qB(4B5%JPv?@jRG9CZhy+POU0WfD_Hwrjn^N
zKK*97_hDtH53thybR0u%+8=?&628z-CX=mc{kt>p=IlJT0(2C%fg%Cy3=meXMzu>_
z4BI?OoFVgg{aHs4kuA*du>4&MlvlQiFt|DauCBf;Qp{zJdk4szteT@xJj1)7IAe69
z(9I%s%mQnK@-o!Sh4Sqt>%W&#Vz{P>rvZ^aQ^b+gxL7~BLZD>>#7t&4@oztP#n-?X
z>Sqke=0Stsc`>E$)}S+sWRW(itKq|RQUMSSusj1|{vorw;P>iZ0}?joaD456*@nAE
z$!imPA5e5!mtkd#L=x(!bm=1L0s%KvGi*T>JO&6cVxi0YRmwseT|1NxFoTBSz3YDf
zOlfrO91n@iT^j#o4CvaT@o<18LaT)C>}$pMC~p8piPqPy!9pu2X%QN|hf_+<241`?
z?YZBQ`SxP~z|jV@%gQ~({t*wUWj?6eLV&_Te`Sm<P{{oE<cS^MHHz)r2`ML|?aE?@
z!xS&3%b~0bfEp&Di821}qt7-2<_vl9!LCPyzio5y(C>#H5cxsho=@C5iEEzhk)gge
z?(g7;kATtXhG&s1{q_-9tp-1rV?KmrNj@q9xqKavbVu$lzhJPQ6H#~KZ_fXNZ#^`8
zX=x}w4-&tS+DMn`Gzwsbau@Z7Z&U99l>!|-zwh$$Y9jXB3y@rl@)gblix7^1dYOFF
z3X}#VVUm?y0#>xtGh2y(H22^5iete1;_Il$1t9#JLu5IGdb_}s;)Pn)@o?~Jo^ja~
zh`x583j_yHj){h4LC*kbN*S!j288ILZ3ZDJAvyUmlj+a-$B3TR)EUARHN#ANr+DsR
z92XWbPN<lOG46mH(D=!znRie6AE@n<<_;}PEkN03SsF!8@nG%6?OHeW+pPl?sfki*
z9f!7f=~$mTC=}#=b1}l6>6DY;Jpmi_0n`=D@eNo%1w5ZAJ367fU4SRN?qW}35}EyI
z8{m5J5pV%>6gC*P8F;%<Hyim^gd0QW9PTGA%3DmE`G4dbfVD{43}2xm+m2w^?fVRf
zUQ#o8cq6fsEqz;fTVx*Vx<}Lk3wq0Yx1@fdAt+7S6f^<Ubngv*xFDHs@%Pn-Bbd9~
z07gN~`T(JG|Ih(=$NlDSqBl3wtMvtl%<V&t1%e7Cz0$)AR_Wz=(4TIq$Dvy&(PYv6
z8h;f1eDy4dcl%y2eoLp%@kGt2`&sCbJN{fbo}Y86UkLx!cJ9zJj~)PFh#i{*j(2qK
zW0TrGchNO<W}%{+CFxiedy#IpD4j4a)dQui0>Goeq>JCH+bZ4j-HGQP;2Hk5qFXRK
z@lHKI4WNDQb?Cm~3j$S*R(A>QLUSasV}~HPhlDPa+$5isSIa9rFl>%nHZe1SesA#e
zV-{s^gHgO2BL&$vc4`6K6n#)$y|En1Ag-ZOFpL7t6c;I-acU%h`h$;MfmVZ9(dC*&
zov8gbB5_^2U=UETh)lG-s8TpF*zH<Oerb781*{Xw90}t)f$vJdue|#s@uWAu+N$#`
zndcr^1W`y|Tr0wFA)2H9{>28?As7&}W03%L`O`(kX^~};pp@Z@!x^Z}26|tnu6=~E
z))&tAcJ*_g<muC$RiR9)DGy&Ulx_6Nolm~+M6wMyM8(UzAN@)sILuYI3&b466dx(g
zV|&7mu^nX}!5CUd8rwl}otBP!fK{LfN>mkDsDqPRKjGJHrj%p+0T^C;NH5jR8%-@L
z-`C!ln;=9TYm(L3$Zy(TpYY`Dm;%TbDZLakg@w??wW1wK+(3WTkf?;Fs)E0k_Vd9<
z?VNvz^?pcC-7n!3&GKnO{+oq2&+GzH%gnoFAn#Naj4-iL#kODd?qfEHNpeC95e%AJ
z0+ypiFQ2IU?c(j#fM6pap+3v{!6AuIQ4Yb6@4rrX94G<WKA+~aBTpfkXOm?WHg*2^
z74hiNIW~ISgN9oA(fEUB7YK0NWQ*}Z1N;lm**REpFrmR;5LE?7#Sy`SmqS7y?z?yz
zdi=F2p4&$0BCK|k0KcmzN8(jg5_CTkzD(%&oPh$I{VZ_g6}5tO#Y8q@qT5T_I)yCF
z(c2rG3rCKIiwM0k3b5FLK%q@?e~9wN9|hZ&x;`d^GlU$~hZBC_+_`XuXM=Pq()$;X
zLt*)@)TthA7SAgOHNq;D1exlAs3%kemJghXW1<U2{c#NlW&_vX#@DNTDdLxINh{Qe
z!HGo?Smn)+Nn(uP;76Un7bn0BY2rhUE@xO6r{^Uj_>QX7h3MPc^a1ePc~Tc-m7JuE
z4<|;=7^S3|b@xAS6)O|77Y&ibkSN=Hzy2jKwUYz<0(uC>tO+T^GLV4K)RdQ;=f$LE
zfIMV9&(pfMLZ8%*UQta}`dpWaDe?Xi_Dv(l*d445L>!9y*f2s5CgODxGz&z##ModE
zS+=g#NS(QHgCjy^Yx`OC)8SQkRd4gFj(ma{>vTuqoX?C*lsO6Fc)WEnkEqol#Gf+&
zQ=x#t9yB2nOukGx=D4*uP+F>9hU(=Ney+&f);`^Hh_Za=H2D=DnXSXjNTdkw{j6WN
z+kSr1IPWOL+6e+s_NOD>bK^7HdZ90W&s|~^lC;3MFcqYcQs2}GD2m=79esw&kPIP)
zC>d>!ZFj_7<Oj<3z6*M!Yct@Rpl+Zzi#OPZq)NHN#Q+hGph()f6x()k8Fz&@)D`cN
zsy6~k(BF&U80^=fbw>#PJU2<Ni8@>V-Ho{5K$f*`CS}gIY)T{+?y5$<Ko~%GLTs{9
z)8Uxn$xW6vDGZ9uti@?u<h}T6V(tIPcaqc;z2OsOC4Bo%L#YbUb5|$K?Jb`o^(o^m
z4`iX=|NV(4Mgpj4@Lihsc1whLUx!2bZVC}Ptqhn;k`_+lf#)Ib^JX2~!%rZ761g=%
zu-O&wmwFYBqKZ%&UBsY-0GJFB2A`=4v+mV7!Y{ZO5FgKC`*XrSuHYps3gc&2X|^9|
zaPl1~t)6}AC=cxx!G8~Nac}8WK*I~23U1F%!R--XgfGVt`;2&qf4hR%m$QP%n_q=v
zK*zF`96?Of+d;7HwDp$6IFw&IDOFx?9Fxly5IF=jqS!HS1<^$WoPy@;CH(JH=vgQ;
z*6GN}=ppu0ij;akD1-xOGro}l7AVb@w()~}xIt&i!n6_hb?+qeOU%L8I!i&5{c3Wy
zWYj-B%O)b<cAjH|E{gQBMybHj+g5Dt(<%`_^NqF$ei<sQ;HF^(8$xk{<FJWE_~UGH
zeoUczG^*cycHjM8lk|Y(H=%Z4osQI_GfNm1h)97VcBFaH+UytQeGjM~F|yWs6rAST
z$)h<QcrTJ2{%i%McpFn-bgIDBVkE48XZ9t?p%{4A3G<LbU-Wn3!&&h!m;kRhNSqX#
zbnhEfzW@YP2a#?e-i0@CZz3$<@=DIL%mJL<X7sm5w@cum0NHkcy@S$VJ%Pm8dbgG!
zjVNd%8~(eLg&sXv7mxDbtH`5vkAx3^b&T1a?fButzF9ac>MVN|!+gShJ{a~x;KUu;
zGC1m!x>xeKi2LP_XpEF?!(faQrPuh2?e-l=G!QcdaE4l7IUxj_^WI`2iqERhha3Zo
zKGMVk2U$t4OX{(M;j0NyldNz59io9Ack}I*ADWGy8?P4C_7omjO`o@T-INm2m4F}N
zqc?)J?<1TEM!2P6M@#!NQIIAUJFVgqrZ70}8Klb5MikumDMT(H`n>MT+XxgXEk^U+
zHboJ_AF<JNe?SjX1n5e9;&~EU4DDh3E7JPz@Y6Adp?Yk_n8P6)3zit<JDwZdy{JoI
zPGusc*O)w2T)X&>{R#*{gFQx_YFh>Lf>}-7k{`1SY@IOz4tl4cG>;u!uipXBJ!xQ3
zFIPa2Rw0D9Et#XY`fg>HaN?y0g>gRA-AkIwUC>BQi6jvdxfzhJas3o3InZmwj1^bU
zA>m5`o4fr&hdc$L1>2O3(BI=!9bm5u*4}A4hOa4BH|^7OzLrczOzLG7wpBExU3BOP
z?apq~&YO_v%{J`mVVd8;K4-s+KaSu+3<6X_(QAev{zMa=-A}0Jm4MSv@DIG^0@;t1
z|0t4mogvzj?g~a1`Tq!@9JFWK?fw`dry>9=CCEY1R%)rI1}Ob^D7QICZ{dIeEKf}n
z87xD|q!Q$*+hF)kG6T`HN1`aj)$ef>Gzs=!FF9Y&u|~N)#Z-*C#YR^Hn=9$1o?#J%
z_N97Y_5erXw~&AgPzwh(Y1*=r)%CT-+UfCzriJ*3q0+DjMIi7CTK7;hom9&V$MRz)
za6C{pkS<@T!9FV*{q|XdSzZ%{QOrsUap;rOGwITP^KGfJxHcBGf=_#^yJy`yk57Yt
z`Qat$jY_B(F=`NT5btcm7UP?pkY*V~BGxVGZ-pOSR_Tvz<CN3+CG5MKRG1LzC=<Km
zytwm!C~HnAUj|{l57rS@e`aBCFDvoCX$l2nBi`<+rgc*XoKZ&jy;s`guWmV$^m(;0
z6ogOC%OV)M9eu_L9@Wv-V98>kl9;3VjOjoRX&m8#^!CSfB`cYjmocUEUYvz(2(ax1
zXeGdRPB(3!mbMfpbz*1SMfFnQ_b7w0DC>;3%`6uNJu!pUgP48h)-mz!Gd2Y>cTOWZ
zDs&Q*T=+i7mg2SfuK^C2ix`a<<KUeRJ@zFr10EmUTo*Ey#{D{IZzfdon-%#jsz!LQ
z%BT$D=EFGot|_=(91?nI-z-elslnV1J}>LuT#BMlX;fs)kn}$1@OK&ZlB`Y(CI_8u
z?USHenh<8X+>&mONq#OK$s`ol)k__2;r|)s-M5v>(OHtPpjt43FkclK{BFks!dsi!
z9p}Bf!9u12LBALf!nmFDN<<$l`kKdb7@L=L?4$!cHlT#K=2_KqLrs&giDjFa%gVM<
z-n9k-5NnWr)oE^XCuLhzWz$~6B|MvB!L0sY#s2hc8)jW@;AjGh+Vn4vm@a_`uMgjG
z!{=&U(}fbam+^;0(X6tVa*!%D-x+*`h&sTq_eGkM@BF&`^hr37>Cn7h(S;D!P>WI6
zz7~t>ud+=fy?9oUbGYvO)YKoY<v)vLFKlgN?a5DPhqsvL%eSz2gB|?w&9-Wps?Oow
zMJ0VXJcCQz3UH*IDXJCZF){G^l)37vHi;Nr%kRh9Fagw5dI^RH91Pj)zNSFbw;PLs
zq!Y>Xn${X?S_+%W7gaP@bS>n%leoJ5EZUCltjq#V-yI%)I&6k^IJ!R5DbrsepjRB`
zF^abB^_DDro-fbX`iZ}A^JPJxEwKb|m2d3i0|WU8XivEw6rz$oajQBSx-)j7VCpWA
z2@!yoSn5Sl(jf$^x}$5Z9wr>l38B(<;@rNAiNiYGK2w&KqyhX5TVra<JgN162^Z2%
zp297W(_TMjEd7Cp=|yTwIMdk-2y0RwOPXeY>|z5`!#9&m8BdWBdfDTEWex(GPGnd#
zD%GZCcvrS4LCk|*ApNM2KL3=18wi#t^l%at<oiS_nq+se^#KA5-0H&s?~UOI6rNT7
zd}7yt`JKqQC+42K5_b#Bu!V6ZyZGT$G?SyA60puFdUar7GzeSck$!m2dmc&n2we&M
zU0}J-CQb7XR!)#806KflY9#H}d-mWxe*o|EmkM*TzOMbEMEH|jP|ktlfc-GU-SnKW
zxr+bXw)8OdlZ_BipuJdL-6MzV=kkR=?CaQaf2+<lK6s6@yhX4VH2En=(!PQ@({-R>
zc%ns^B}w4#8n{2w+g8K(SCbMmK+)rumfxe8paCeCc)|Uo5QDB{!SIv$?GhO};ie8`
zaq)%j;2_GAt!*?_8>BWD_+>7XbL?P${^=rRaa64d^6A>3PCsK-%s2KG=F|xwaCP@#
zz-}gX7me7(;=>4S*po@5r7xuASON-c(y8{*^M>?ned|RssSt<SejW83KRxF`6$wY_
z*ki{y6=b2gbfd1Y^UnpktFLzASlHs*T*}6qIFZgvNdmC=r9DA%^+44OU8~;f5N}mj
zf>sngnqt*iI)kCii%SOUB4mKPD*MG7NhQSlU|X8<V3QzZp>7K{3K0{JOak(wm$(T?
z+N2Tb*WBZ@>A|YADn7ASe3G<!mG0C!?yodD?(_93XDn<#yKjC9GEZ=kq<TpXo`f}B
z`qYv%;3z!u1I$%c#==cDQ%Af}Ny?sgc0f2RQ3OGE{A3?-F&kTDKoDa{y~;14ona)S
z$kj%57y<Ko_q(E0c}vT|?*yd;OvXd;&zLFain%YtTNM?;yZ=~wL_Oh}GLcn~D8K(G
z;o45Uv<0=!hKd~9(7tPPl${*&&po(?i{2}dDtdSOY8$oeGGJ4T5-&r3j=%pwV1Zun
z2V&!va6E6?r$}d5(CX%pQ0zGh$%ga;>>pW9zed4kZ0bhr+i*nr=fRrcu~RGp1z7H=
zFFW%q%W&^8FFzCaI5C<;dA80S*GF}e^GP?Yb4(HsNfoxUb|^EgGIdA78_13|#m>Md
zS)e-PI&XPrH;fk!_YQ(OXKBOoK1oJ_4mA2#2I7b$0XuYTY~x58)7|>ov54OA^F&T8
zjo@Io)wk!)`ytd(<{`Rs9vBH!8;`*2q^&Qw?RL|8^@q`k<{XE~;Bs8IHHA=Iw5=PX
zy6WfNwDt4B)1ZFfnZK;cn-e}o#M*<Pz+I9j2|<=awC>6@{OT7Pvhqu3aYPhm?3RJH
z^9R(07uCBGD|x{fGi`b*TM#Ve<OLlOYD_f_H8sL(GKCm_O8loak<|l#O(Iw}FPR>U
zdI|L}#2V;q9Zqqhx|sLR?*!N*rI*XppmUTYojni<zH>##%)?305{=5|fQ#AC(OcGj
z0m_BM>Iy62gquF$;2jsiPts>$_#$UU&Sk7*M4yC8nv}pz6J?(Ey}FR5-wu(#DR=Bq
zPrU?dK<{yG17>@?nQnv5Re|i`z$LNAc%2_>j}B-~*-jtzw7jPrz;^AW&G>TF8dwzI
zhWv>4s;7F1y^R<g7o2Rf`d9Z%DJZr#4nwjsl6SRfDsTERo6{*7TMy%dbO`0Ry~Fkp
ziryge6;xv0xe>1U%fz0^ke%2>@n;zy@XI@aN_su#x1G5vf#W(g9}N~`8&rc|N(M5-
zcH8S<rJvKAtlEgWdQHuQo8U;9*PCdsZZF0@b7rw08l`OE`mz1NP&JA&l3FwX#hk$b
zyXV+@z5)DqFNY^U3hb8$a6fB~lxH>={Sh<;DL<3i79m!LH8yirDng8X1cC<SKvfuF
z1UV>5a?t>b48yCeVu%<-Lf=MPfJ5W(Dq8Y4iEcG{^$Ex)h$0*`GdxVJB$=^6pp+-k
zw&8|h=eN!3*-dQBCwROFZ=fo0+U8{H<X~$)y0<AQ$SFh8U&=JTOCk^SGW5bA79=yK
z-UdgTwmJ#WXdr`1S-7kVyB(F0-npf1A_T#s=NHV$srV7asj^^OF58LQ&6@9v*c{bl
zZ>o`Yq1Ok*992O<(>_2h?J@a~D@0Akf6*2F*abrDT~^@inCS<pAlz~a?uC15oxp5%
zYZcn;7%)aW&U5+F4E1aoa6VKnX!m#AG5VcT0pt`Y<}~|w6OWiLY176*1DH`QZAS_M
ziX@8=9@Td*mTSJVu?hlx>{<@d_gG3|!}OGM3Te*MABwNIO4!K9-QW`h#d`d|i+ur6
zY8+BEkYLg~!k^Kw_9jn<7wbjW(7;O@8BLA`^0IZ9@f9%2XXJCiMc%h%B+`RuBnl=7
z@_4x~@ZMs>BKi?A38JYWp+}v)0jq5R<y4PV5~bZBtPkto%)2^j$an>Mp1r$RG8nKv
z$#$w~UZJNg(xdvZ3L0NTy!o@yF3v#r7(Cdmrg-C@N(WjX<oyz&8{jqdl>`AIx#37o
z`I#p6WmvOAweg{8OcxM2V-Fw>@E?^7=)7fV>%$-kKDg8>eE9GODS$74@Vz5;bjXl9
z{05q)J0N$VPeo<h{6O#0-2nIkFB6U5LPfIA0Qoe^yOIVZ(fg08&bj(7#;<#RWgPwk
z#SxWjd!3*;<ixt0aRNBp+WMR$FOXJb6D*)h#>&bG>DhwL&Wyr1I1MoTtHABL_oBHv
zybFxI1Pv1|X?e4tt0XYa_Zj=wDF-+F_XY)eA)aI>GlMcAnj#syyP3W|in*d4<3PhI
zAM+F9ObiHSD}iizy&L7$Lyy|s9OpU6T<f>uky&E8ePbD%G!zWU6{|--M0G%l#_r88
zk+&2%!g|Fo=-02S{vB7z2*a8Hl29>1D+8YmHFW4{E!M-E=|S7r5+786#s6&UdH^`@
z^NegN-uJ>+zR4)qD4%80=D{~)yV}mQ;8FfPL~tEb)oR%!jLkG)L}FKr(J<`T7W2lB
zx-E6sth~Si{X3#k(^*}&ej5$}l13PP5Y(}@wB&+oiOAumOjG#Ss@^eO265FI1mFLM
z>b)cNuv@PvsINvgYt@pN#VYX!*~uA--P_*RKGY%F0b(ZA<xe~JzR*6`g+sXJo5x`V
z`W!4?P>3RAGLgl!y=CHJk+tw}IO;PQALb?jYIIu5!zL@c3uT7OPHH{j6hj7qlXal^
zD4OuhlPd>2I$*(r{yYCxm+kF$->Ht87rsyDC0o)zJMz)xRgfk`e6Y<{8(X%V;l>;_
zpe_HnsK0g%dh^ckWV*z*WzyCSg1nLh<GVl@3sEwT?zZd`Gil523<LKh#;^SV18C(#
zE+tbv>P5O{Kaf^^AD-^nyUzOelC*6OXX?94^A%DnX2i?-jXF`spUBzzdsx~+nieDx
zd(54-OLA5q6E0Lieb4ek*b|;)4_#PWOQbniw@RSoB=!{oq!Gtfchf&{U2xPKJ(uEu
z8P5%pri71V?taIb<s%!hE-a#42sa5{fOEvaGDT4B03{RVLR6t@Wqm3KCj^A#-=q7T
z_*->b*V)@}F}4i_!F(z|*Cz6$HmD=Nmx`3uI+C71kS^ExY1?_kRxod6b@T@1tRg9i
z9n_Ec%`b6+Jnx<L#&rINVpOHACWr1Gcmp%>%c(_laa1I~Jf*F2E(x1p(}d5NG?&}S
z`(|56oK#Fo+!H|2T1=sRzaX`yYZ#4>H+1d^-~SWA2CDd^QE*j{9jQ<H6Pa|=eg6Rs
z&klULa3N{CtAX{~+0YLP-<0ji;GNY#>9e9M_YSmO&ltDdkS)rPh~Mt`m0FUtC#g#c
zZDY?AWF2~$phNo*$t~pL`^dNwzfLGhdSAIj))s=8<j;)GyWE~sJU??Ja9aa%ak7I~
zx?n@=E6~*DS(e$AH>!gY9y}YuWtn2;jBImA80~esz`gT~Dw;h!KBjeTkb=eb`%9cO
z!yUo`xOw18ox^X1(oj03dvqzl37`1Gc|hxB?6*-i|AY|hWB}&>=Dk9RDk3-HTLNPT
z!MQyRzz>=pKTNKDNxnfIwj61lG;lz-&^Lx6fj${3z*v=ihk#>X4CG#`(z2$9^G*MI
z8=<`b0PPXTeiu3%)D%#KgVe6O10h6z3tT2we$iVpp>-bGNmAH=l~}DhHE7F`*f?Oj
znu3uL=sgf`L!qeB@HRXIGLX^oeKx;M@Xmii8yr7@8=&xemE2@)NankdgqB{2d<?`k
zL_@~gK|?*K94J#wX2L;x-d8nA?PXj2nogQ$i9C)B={^tpzx+dKGXJ3oU0%XS{~s3o
z?`r8I5L~xRIE&APBw;DN$UB7gdTd!xGM&r0EX`YGsBaDR+sJ2;BY7Q}k{`;|erWg*
zd%>|9HJEzyzmH>dg_vw=akZ$JoN}(p<ivJ9A9P*=_)#{Rt5t%OBk|E2|1U>Rpaf-a
zK6FMm=;J1Vj(4?mL*%CLr1GmzU>g6!s`y_(kt`s%pg`}er@$}}hg$KBH@^kMD0um;
ztwl^sk#6q|+-}F0{)No_y94?B0Z5`n9&I2x3I@4cEGK~AfbhB@pa|za!`%NrZ14Vg
znjc+v060kWBD~j%2i+5c&w>YS*Tw<Z>FBKJSRzEO?K)C&#aj2jv8=pMJVaXLN}u^R
zkQ)W<V%bG_?qvtJbO7_&7(hO_>sqV*x9_<@zo%$;r7th8N42Zk*q4uy0Bo1UBbDDF
zz-3+a|7A!%;sLiUF27L5LIjXAPM_&fv77)RZIbiEk{qn%$^Ux?u>QsRcrWBY^*y5F
z`Osmu_=u4+4L~|6hk3UDje}mL0RY0(=V`a{6rcf1oFO03w*tt4NKb*g!0s^>?x;Uo
z{4Y%GUpEh6y(U|M5#`Zn_fTO*$tE?P4=@6i$KHr47Kz$lRMag~Q(ph)!2bJ-Tk^oO
z^}1JTDxUxl18o9ihkrO5IFd`KrvO)c*)p5`-_8UbyB?j<4;6uz6C%ENA?ELm9m;{#
zw5Ye`e}J|B=tVz6(Oi*-;!~aGE?5*)j3fZJ%MgR!g^%20@gp@+|7A3YzCaP=%#MTB
z#Gob|V+TN-<7P@Ot`6*wMTna~q@MQgf%dO6{Zlzo*<p439fd79Fc%{UBOdq$zyw`z
z4V(Y#WB<ViUy=L$e5@R5Xj(&L0rsBK7NuIl^3@8FWBIr3mMAYG1i%xUQ=n)e@tn3y
zDqMk5W`;b_9xk(J`1Qrgi8b8CU$p8r`4oQa=J1E-V_A!~>d8vpua*j<b^rydW4r6j
z*T+q)01nXm+8e+#t*y4&4fclUGd}{XmGu(PNA_$l7Fz27T!h|-%R|lbopga^<*`@L
zW*ZKA9xrTOK<XdS#$^Tl(Uaf1a-)S)pae=Cpqi><*r7s-Aoqjr*aA4d_!4)*H<|s-
z<8{GNe&ZgD5;$3$Uv<{dG6~-v)%H5?TV4WH;!|zB<(<-TL!_XA5}qr?qdG)zcVp)~
z?SPiy?QcFPENt`^QO|9~C0C`~MJ~$<u8B+-*j&pZH<IT-C;ijgw|>FoKJU{3pDn11
zl{}Oin`G9?v4R^1xZEpXLoUvCpyhq^_ebT~nrUQ=${Lm|CGZ<07Z4$U7-qkuv_cMm
zb&i3Ls4J}l?mr&@p`6V#IHei5T~MbhC?%2)(8eua)Sh`!?4WX*D=wJTl3?>jps#)7
z$ZeAfN5cVwNWsErqVz2)t*iChEgQNX>HyKQA|2ct|JI{{L?N=*B)5*_x+f6d9)lCp
zk#tl26Vo0+t;W{+3i{K6-2Y_>_CG_zFH|Y(5R7H_GW}oPYTzNz3Hw!`$a}CTJo)zv
z{TFqLC@jWaOBKzh|Heh0`2yQWrr}D>_l2{_jssgrDu19Gg&E}=(9qQDNzv5TpQ$h&
z(>|Yhcg+7g0BICcN)!ecJ2sy3D+bNL=~JnpkO$dmJJLs!->diO#U~X92M06HEw*cb
z;W0rG32L%&v7_Es0eRCd0uJ~HnE&!8EZxuKnTe#lmO#GGCo&fI=5$S@*&4-cq6?Uz
zwQnM;@CaV@f8$jCK&aK>)jTpbR-Ol#bSbtv?T+tMs?1aU`;ZK{@L?-epE(odez!yt
za`zi+arKpdzGJm6*F)!K!A*R}Y=;UQOZ1#`veC|Ppms6Oi$gTi{MqEb32z?xU&qO+
zBV=&xQMJevH4}gup=tk8Y=)$>vvWaT`DUx6cZ1Mh;y2v?jgWhI0JjqW&W2?q%5Z^e
zG=d%;xKuh+t%!WC>=nRD?;rp8=l}onPgnnhXtgJcc0_*UOF<QP(ITq)%lSZ<_jh22
zv-*~;m4otz(L(jUu|L@<<GwCCLNA21md&i1b{Y=s_AujR617aP8O)?2{N#GOa5dF)
z;#hl&Gn}H-P=myeloxaQJw5uDtS+VYYr~8WA5Xk*tN(nQ|GG}*Ht|3lZZWb{KQ8p}
z2W(POZldAzhw_?v9!X=mmeI8*woj`h&n-6nM!H9I?pLk;bDZLm`I0UKP2c@du|j$R
zvrqq6T5YXKqqV8F)VF=AuCa;O<7pzgrTMg;{lt<t<sk}EoN_o>wj@1cU{%Da%!KN7
z72_oGq|6k&RP^S%adGd?DwYVi-|btlT-(qs|CUg~b}mGvI5o`sd9FJ=tbIJP9jx=N
z<UKcw&5<UjrEj%TAa`_rn1kt}_W$3l{Kol%{8gW=7FTrs6GNldzn<g&9dJzWulj^>
zD@-O$z8e3p2cxV_pjCqXtW0t7k$wYY%nhSWEl7=u>53nPNrWf{Z?)m>x98%ErGq@k
zv>~wJlwDXAjgh2OH?6d&*CbiQOmR=jKI4PDu^EJ1i7YA2FJ?|f@#}(2U3ix~X{K(P
zt|J>RX&@3?hN?IW+zDs}9uQK5SDJa(oNX65cHf9Vnzy3DhpX0dqxm)U?YKvmmSa&U
zvXAzrrX24&Z&zDf^#ngOY?%!&|KJyydhL10x~tRZdJ?aeX0^;?iSW{S2X00ewy!7&
zdQcLaYPMaO-$}TeCLrD5^9!$ZZ6*z*xUB1ngo}P6SZ52^eUunMN1TDH|8<S`{d1{P
z0Wum^2fpnafnL=m$_&U?afA&fKhv7@ROgy5>-txLO`n&d8#<}Ck!4WoxSP~J3n1V!
zf6#7s4Ap(VL(J>DiLvmdkXwJOW$bz56HCT%f$z(ON3d<<qg1Gk)yh>2d&_IJP8YPL
zE9cbf_l(-EV+Xjlt9ME(ZAh7|(#N-s3Js76jdBC#pA+EIxS*ET{I=m015FAChhO5$
zbQ&fVi0|8Ud3<xMruRh`ntE0%<TReME{0N{M+9t#o{&07UJe?U3<g%Ho5#UVNd6Fr
zVRNz->8^6FB6g$6ISH@Z6TNNrc(%&pihf-A8~cj>0{5HRgvcbny>Ui_W5a5bK+m&o
zgq1G8SNe}P$NHJV-g*-m&OLPfqP<Xc93+j;NpB#ERDY#K?3v3|E(PXcha@}Qz60ZP
z2i^m6PoU6iXRKj3?SeqJz@z{hxOf|!6?HKLcco<3EWH^89;}PF0d*t&qynE0he^l^
z0fqsEf$G1ci1Xq++&|)e4AD@Qz||{3PEi*7*k9y#TI`3cu*0MI3Q1|MhypEQPGz?5
z>lNMQ!(mJfV0b97(Ej-FE?aPSJH#`_*XIa!e|HleH3Vl~F`de+K_>e8R;iVi*l&R5
zMyu?K$FGr&Y)%u&1umem*!Lmd<z@X^Zi9QnCOi9#oie>GH<BT<7_5Nve)9gFE0d{R
za*ql}b-qG>>vz=i<k+K4mTrT6idFj-p#h<8>z9N_8>7{U!iA!l!TObV()LGMGaa6*
ztUm<|!sc8n?i<Y84?mw&X11)+`|RLW^poD66EbLYDm{4UXZK<a8?5|ZZUU3}wujXj
zPn8)$AP(doLP@esPm5hi_bunFepVFOa05#Nr+xX^H~WyY?6ab?xGv9QT<|j(6E>Rv
z0iVol5acIpuPGL-x3-I>p`-9#b{#WNkXBOZug(MV^hc|hXHX+KV;k_+MGa8%&iGt4
zUpWLLJqOSS*ul~0l0_eW5;ygm1m^}Ah!bb$?eJVvFX-cw?nH3U^Ri4KnBs`vEQF$y
zkq{;(vSOc7k-1>>sEVYYHLeY>FLX608RDH%864y!RdzU@unA3bvX}`NG_>W+-}e)b
zlX{jp0hdZRQPH8pe%C2;jOyNYVqFvJ)Ck#x#52agFj=}I{mIN~al>jlJBAZy^Jb%I
zaz!wAPyCi0PW)u!M~uR!b|<<=Dad3WcTDlQ^iqB&{1yAy^~>*GN7+A<;25yz2ApwA
zmer9Z3O-6pfi15exkel7&zsar$7MuFuK@BR$`eBr_K%Od7(0oJFLTUEE$k;tOxnJX
z_jQg#g15_G8$I>K=Sdqut2B)s`y_Vp#=*tZ8Lp-xP2-2y1j+MdI&AGb6CD*unQL2g
z=$^yxewnb}UZ(mwrJi~fESs!Z#SLiJKcU}BbY6OSqv@RPp~+h<RJg7QdY0+(Yf94|
z-}cBCw!V3M>ZxD5v}4mHOBlb~f5+>4!D~CyZ}ZHVTwOO_RX6|quzzejJsGUO71gdc
zXZa3tAywOEyuZAw_yf}_%&KKa*jD|8S?`ck(N!OPCeluolAy={@>OM)KprsO^eel!
zyZ?<k^P?9qyEqn(3H2S}cv6O-)juGQbpdMUUBGUXt8)#|JZ%2CuKZ{hIBv`U0e1C+
z#L~(md!_kIBLi}rOu-$Q2!aj3xW)E1q3%0K|M&{89oVk%j;GH07uSSvgK8V9UeQ5b
zrn~q0ljPjh^&TNZ<{s7p%~{;^na_(7Yi^+t0!Znukwe0q5-xl787xO-Q*w0|Z?pd?
zb543c>CH17V0cfG{;V<LM%OT>5|%}tok}tuwymx4?fa6T3e506lIoYuAX=Q6XH}v_
zbu_^a;6;C<w*A81u}323O3Ww~;c9{?(QO-uMG%ML;%81TXT`x%uCoKVSw2j2nI^~$
zk@qY_{P|u*px}tS)N;jy2iD*CgN;)qQaT*l8EC^DX6pR;wGp3Qma9&O<d5&CC)myg
zglO^%jczE$YqUBBG_v<Myt5;6N_gKV6UC9`W|G}1FLHh74()#Pl&|%VO^~JmC%i$%
z)c90dj>7h<PK9`(U{_o9hvmcdT_>OE>Ov8uuww3}@HZz*x!yzT<M~`?OMPilljS>0
z`R=8G)0@ZMY};wgmk;ypU;3~N3w%n+ka|=%qR8z~M#8^FyY^&?hR-vY9FjurqU{{x
zI=$zQUCDYsT`nJI0`b$f*|$F%s(vza41OI)^x3IdGF6`X>*u%WqMf{o>(}<*zCD|S
zbWu0>pEcOWpH+R!S<#inJ=lujQD4XOC=w+Pm5w_G{krj#ew>yb_M$$bv45i<XzgCf
z<f{oxeQEYBllly3-4gphB;G2|Y5Jg9Er?JV2%?5@3}TV1^#V{|luyhv5pQDSuYs5;
zz-!a~q@Oq!ed+lo)k)WyGPT!MW#v~?5^H6Vi_d+&Q<5Q@_nv`E<N+~Z@}}On;7qgk
z?VQVW;F1{cY|6Ws>SEs)7w`KAYn-jbQ;h{4SY9^X{V8x9comj+tPw0L2J!Sq=bbbL
z!*0CWc(oyF{FPE>k`JVm_tC|XTW;4;1bG3a*fEahtcRHStOvP973Jza8l@0%7p0}+
z1GQ$svt@C_31(xkvWAi2L3dUBF-g3B?02>*)6B+p&sAO}u|59cna?N-*NX%CwKh4&
zxogc})<<vEn!ozfTs`$R`EOc=iL>b5cv4wtfs5+q{wy@l3tY$e?IRJoo(!96rMz#d
zlvoHE36rW_xe)o%sz32EYBQY`nacI#d(tR|?$M*#(55U?dNS|;)41+{+b}_RTfLCo
zlAgSkvKfOD$0j4Z{(i#p-C>Kxnh?FjFNH5!Vdu5g>=$axpBUvL@=rToR*4gC=CE&g
zum7s-cqrkeg8Gc~E%B6E_Yi;%iVI_Lp?U|~{ptLn>)F6}2b5buS+h^hhsDxEK(?nf
z+)wFY@yEHnh3)|D9thPP+C6VS6-GRKR>gt;WL`K0o`4-G4Q$F1@FH6Kp+%M*|A)LC
z_J$rswv)jjJYkjjNng}W3`S;tMQl8q`AO59Z4_^BcNb{@MGTmE(P5WokRG~6;e0ff
zgjCGG=D%S4M%nhIq1Y30=uY=3K>lW^AqJr~&nLPwe23A4y()(?+vRR43n`*`EOu*r
z(T1yO^KfZ%bq`gVksov$f9?*=i?P0+5FxLp=hgI>*ZQS__W@sCrQcxYHMl6ADPnci
zA-D<u{@WIXk_c)3(2KR^qn_pC1vQ3AGoLM9Kg~3Lyduu#g|7}0zIk$@%2fp>8}^SK
zuo4*OlSvWp!py(C0KkEfr=?Ek)6_91#aAM*a`rqMix5%ydG@y!d(^8%v(>kAww2*B
zO{C+X8JlKbgYN>Q&ZWzb7pFq5f|u#n5pp9h$@_Q{M8_>;FL|YNO<NuaL!{1?yaf4Q
z-Y!3I;8zlxVH7Xt$Y<o7a}^H^EL#@qE#Qxl><Fc83gl4KoRTF)#ls48*|;5y*xt9T
zLB0<e4ut=5<TEo$SQYl2moS^m60_|`{fI`F-~CtT`UChE7*m{B>;{qDBf`Ufj3_@^
zCRJ}na!Gcd%5Oj<S~@@&_m|G%G<*5WslE+AU3Ie?)iYhp7hn;7=6KUismH8GY2Mab
zkNd;g9f6rV7waPSj6VAfb3O&Dj^r!Kxsq~Ujt4g3`Cv}?SJ<!EuEr3(*I+!p+7t!F
zMaKjcJ0iYfP8X0F0ll7zMgRu%me~liWjaCpU*8w7z4^%c5mz+`63!OE%;AD<RdP}s
zG~G#8ukDWRB`umZ1k8V??mGGjdw#3R{e~JDoA(v_8dcGH<kK!4Q*CRNMUJ&4n6m-v
z4;_Sd6dz+%-6gW!WQZSmi-FRC{I0=l$&Nk2Qz}azvWS2r+u#%;7r*!NP~PbFj@PDU
zl{5J}UW6QodhG6;bs+~OGP<CQ)4Oaav1N+(qf0KJm$4SBTXjHBdc5AU1>@hgeCWw^
zj9fZCYP>{`wsg)Yf72%19QnE}dCFmuFd2(iEvU3bq3Q0|r5DU|YS|W=kG<#ZoN4>>
zRYR5cAG=He_@ggRXC=h9-6Hv=+d(XvcC}=-`)zu>2V>!FD#iEn8#y%Li(u)|zoO%~
zlGr(S<rRgl)io;(gceFQ*@c{!-Th(vgK`K;hoAD8v|dR^66KGl^VZ375M3`+4S#rj
zXZ;KqCGDa~8n1oa2V~Z9qbY*!HPWfJSBJ}`yq${ew&-Zu4!)!Fx_=N-e)zPn*8C)L
z17h<LY}1@lpeW<$t2>zKx1h0&3KRG~#F<^hjO^Jq)V{c?T$Hy~8S(pfe$~C0MGnRo
zcyE|znDo!dUHaVOqLwnDBGQMwTpwzCxsHcEu*gqbaF=71t};Jblw0HRFaUO%3&o62
z+IOYj-hka9jBp!zm<D*8;(Gq18e$3>UFx-+C9s<$S)*7el(Mywug)okhe8{;HV65`
zj4<LgZ=}wTsfb~y^Ti&nVvLWA&C&4xSl`EkDFUb>YR1lgju+gH=B=X+&bHgQ`0?+-
zvPKwEB$&K)zP~UhGF*&y(ve5w?;wwguAjJ`YLI-qpgl89kcx`N5`n#AXGO<cqHTz|
z!X?98zaYwP#P}SBmxAoqX`tg{U`~4!`wfeo{ov&dIB}|^@>Gx&U8$^)jv<fVm%Nm6
zTfZ#u5gaDnf=iw?g!kC8gtjbq@7~93G5Qic(!25E64&XzG?#cW=NcWJUzCkI5>oUY
zvGb~PRw>)GU9O%#2BtWjO-npEtbdPu=CGh7swqqTI|a9|&&-_7udzAXSz+r7i-Ilo
zl=C>^7^jcZ=6&Y^q#%E*`W_1(5ouV$dYVL-RrMJcAK5X5d$VLXcszE4BZ97nTAwGI
zEiol=KQ39qDn322&*HwGhvkheK={^(s-xPBK4e;4P^DEAmt_*D`jAYrp`RV|jfD9O
zw9gVCbD++;OHYz(oTI$QeXXS=SFmNaT&Zhvx{S@}?Xu@qyA$X?J_i;6ry5HT0*XhR
zccNP%$T5`u3z<1Sk!H_c?JA6KG3~aT6))J^x*nxwlY(l9Q}FT?O~f`do+f;yrK>>1
zma{)dX^mxv-eTag8<ve27B{>KNDnB*ez%}1hktDWYyE~7gO>uCjF%OR?q}ohLG!Di
zh|5p%rA0XB$Q21NJ1qZ)cN>BmBpW`)@!yP$EGdBGnG5mpWg}@m|21E5FPmAy=*08%
z?YS^~3{mwL+_2Q6Z|@GJEXp5*_1hhPmUeu2h4#_r@VEJc=Si0ARB`LrhH$3umT+75
zAwjB(2p|<;*W3Qwpqp)Bxz_pgwb8lTx$#n45N7^EK^vZa^tH#wo-sEE;6e37dYm#f
z?n?DsjT_9dru}Kxt+#K{{bdwY>DlWvz<g@8<G@L!-TEb8q;rnhnRB|CDd>Bmtvc%2
zn@_*F(u=%eS0vPhSWQ{F%>QC43AkJ(7(WHdo)=>frOWRRlezML!Ubet0`?>HmY*gW
z@S1`wejxT!3C1N;EhzV>&Xs65ee@QPu)E4uh<}95+(V{HdxV9U$|>(vMq~3MN29de
zxMF=lQd!u62<iM@c`=50Ex<NcKTTb295U?>ckm!@LaaZa0H*MB{ED<knJ0^~AqAr=
zM!bQv@nJ&_CR?{v;l0EjJA~4U=1Bu_S_@?lewwy@>JT=6Hjc@venWeM!>>lnK6QX0
zfGbQzy4GO@I~19q{`2Sf7|DX1YJEkIlVQnMzL<U>0nE+~2h0$9j39Ez1t$?Ev)==m
zyQCV`YLwoElOt7<TU4f@uvrK#<*tI=V9VX+pcT>tt=@i&r?X#3AH{&M9YJ`*@8@W-
z{Yi=R*5?~@PejlGu6X?fvG=U7Tka2PFX}uwepq^XWEaH`g|zd{Vi@y{K)K}x*9T*B
z`V+GrC_?d7pjx2%&pb5$K#}|}pS1@{?h@^5Ck@}rfEZwCdpWh<ZpnyvAMf(A*>|Eg
zjiPxw8xG9A%afb46xOdc3fe?)wnzCA;Yf{(|CcEy*TZqAXRhk<3Y)zyxliVD-7h?!
z#};3!{6YOwG)2-IB)97p$lY{Jpc*cMMUz$Wot)T(fKNGm`A&&X_VP_pN5$jNM9Y`-
z8y8cNwxI!CM3*0mr04OE)oa?e<BRAd!{jHsJKA!sN?xBXjYE$IXOAzaB!b^uOXmZ2
z>#8$m25ti+wI>}6>^3aNt}HVWE<T_`whez?)n~z2X%s=U?|x<(iRf640q6k|0TgYk
zJIHO17g|yck+Eej?p{9W44ejx&%fqFudN408rF`NUMP&K&widIbZub|CE_Q9kf>=1
zQ&=->y<PO&HziDNJ}7a?X#8kLz0RvP2x2$;>6h3PfnVLLALdCka2Z~sx5&R=3+Pw}
zi)aL@Tkn5gM|*5VP8ng5X<hB-)3NW%Mg$+xw^7exVc4dLfn{KMv5|3jF{Z`Vzw8u9
z1tE!1eil+o-!_X?NbrkYlmQul)CV!&zi~|e7ITOw){@A+{L#Yau{b0;ri-~x0rr)V
ztj{h+0@KV(tq<Y)D~kn4JhleO6n~`m!$5>=43(KO(gb~5s$-|uq|4$X^1@+?9yxvJ
zopi4gSU2>2?4F<@WM*!G^WoXbdsyTurIJjftNh1+>O|+&?W#N_x2}N`Va3w1PqkIm
z{kZL={`h53USI9NoxT;X^{yqd&^GJJb6%nM>h~cCL(HtruQ?aK8_vdOi{~~@>!~z*
z?m(<W+cH$R>(0fcO#bv@3S4d*<YK=eVaj?-DW%4_VkdcVKja_60MF^s4UsfJYHdz;
zUHVYbR`hm9DRJa@!@|R52<G6Lf28w0J-v*YSH~r?{q5aTwuz-r@+#1GN_F>FZL!>l
zY>1eGdjy)Klf=|B3CoKO#_A8Pbw6zKDUShnf<LQ8q4Elx$hqZ%DB}2?IDJN4>arq2
zDotWPR(i*93lLEPetx%R`{1rGUsDO%nhci9wPi=HcIup;!XC%<!I@Mtz#YdaL83?5
z<7CNX?cFKpMLq^mRDaZe6>tyRiL=C}KRqX(r^`Hs`G=`@N1*(P4nnY2WF}3bBE18*
zJ5#RVXKQs@PxF{BppE7wgZBCUC?38emEs5gnV@(}NoW6jOEKbh%qK%T;-+=%I|0OQ
z@cEBn_RI^0tfuQNcbtGbth!*W54?M@#)~PXC;@&*x-@DJ9AU4!-v?r%ZsAE+TYc{O
zRbA=mzFp$&GcH*M$hixal4Pba3F4J(!-BxzUb1>@3zz&_I5wP-nSgm%UQsv7L1rD<
zv)yIqLO?pogrm&#rkx~R_EP?L>0j3rnY8PgSL`y2+C{2TFI4-kU^SxS(%lZv8xZef
zQe*o9`pKTB!Hqxp>(4(K?Ia#fvYud&LBEDuy&CuL@%z|CmvLX)xVab#)p)9B|C#u+
zqyPGdY%|4`f3WZ~%TL~JX5V5%i=W*5Eq^HpkfuRImd}w+%CRRI)m`E`e1MSGSVnJi
ztNP8-smsro&EdCwnkj#t<!$GS{tzVy)8VkY?c=Nreyn}tP34bd#`sgP;AhFF^!oyB
znwbt`{Hu(TCYgdkJ)cP|c%>UEd!4Y)OY8<LYCoNqXKcb3G{_P+_CE?CHZI!=hz#~p
zXiQ>;*LtIu)*Qb+X|a2~YLSYlhe<|!Mcs%r=mf47Cp8ZG!CZ&q7{CK&gasdBJwCHN
zQFu`uQCd@x(CD$I(VQ`S?z9#685pU{biIDQC71KXs~VyvK~Y)QCgq*pxrcqYjkRy-
za!*LKAK^Aj^Fg&UEOzqI;osoi-#UZ)^KruQVXv~&wZi+c$Sf=%t10skl@s;I{3Cxf
zhkn{KZY(z=H1lUyS6<}*2@Yoarbwx=hE9N4mU>B1$Ku^09{b>v{)hi}8~Gb;E=Jxb
zDRKIZN^qe#H6<iV=W3XocT^RI7ImMp%B$5rVp8cY`WsMwZ@vgd7D1MWi|-$M-ST!@
z*^fN5qWa}6wMfVkESIE_dmzWBCe0#Bik~;R6?OqDfqY=Z?1XQh1%ittL;o1Qx5+~l
z?tCykqeO2DC$5_si@|7%OO|f9-bgd2@kvQ5m{LllcV_jTimWqh%zcX?ZCE#tuhZ<@
zQLr05RmYR@9yM0uHe;B;VVf+Ey&vs@@~;#dmv0kaEmwH?WbSsxFFd}4E#>Ts-0y>a
zT@YZTQa$@ef47ey6zMn!k<l(3O6*{BrfN&Z`}i=7aQwxz+#~hATVR-HUi0uyc1Td+
zr-bFK?fT60oy^*37S`!X+t<GQBKLECrpJp19R)%Ttncim6o?mU;Fo$~yRW$F$(y_u
z(=;MX?)j?^Cow3nGq8l|%t{wAR4+lG4<HufBXc=8QzVh`Sd4;B%1)-UvDOV%;W0l0
zHT0Kok4=Hlk9uNov_#a*Gw7Uq`bB%A(Tt9lji{}-aRCCTq*Q}%bU+N%;+@f18074z
z_<JMd_!uo!0s&iaH^g+-lyG7{6QlOlPWSQFaPyX%Zv(-yLbz`hl}2Qc_IMFJ(A;Hl
z#XPiIl=+2mFwEZU8h+%MHc}U_;?7#kcgl^H+kPhZ$#Kx-SXHRO^&A?C5j%zqn!P0I
z=EMwx(C&BC5-C;~mctBBUwPL=PJLW@Fx`aZoQsNC+-MO6ac>AEFfnXt>S4Z?QFsq4
zZ6{)x8SRjFvvUtIw0vDj%hQVlVXQv(mwZs6TA`1LeT!W}Kc7l7ZGVnc>aI7~B!+=A
z-XvI=kRkpAAHG}27Hpuf!0Uc#pWf`AGY}S^EbWsQofQkb;0!&JwyO$WCwI|t^_;A1
zs*41S5k8DfY1~{P0V<M-R<q+)e`#}E6Z3c~WoH`rn-f0cv>I*HUHkSh7<wsGxaPWc
z=-nml-)%>~?7@52pC~pK$G<m`<jiMJpY%C6@iM>i>g6sGM49L)Rx9MsIMm!LU>y?1
zjMi;S_yHJ!jVXniJ`k=}gL=5GiSn^4DdCi+O|LEJ3--C!yJu~LSuoquLqdByKF*xX
z6h<2mEkD+H?QOaV!z5q`#)gnCisKm_K)r9D1{encmynm^LQt(&bIx;Yuzkb}&>`tK
zH4|CtAK*?N#t*mvTKDCT!BnVfK^6z7laqAt2JdAWaz8qqpzIWUSewq4K<IP+HM0<{
zs!v86AVI+AnAgTfHov>1q%xY~Y4-W9Wih2y`W+eusPKf!NN=C|Y@A;ri(Rq;m*eP%
zh^i<^@`Ub5+nm*l?k(OA`PakjSSR>5_&J3VZ})1F=J}+fx&-)Q*UmvXdCSq>?rAeG
zCoE}{Y3u<5E)9aXmt2Z3zbfAb{`t`nS-jt?z%LtcE~Os8S-(4nkOgl-vYDvlCDD&?
zHLujaPyf4nVcuC_HwL0t_$bJ=8V~^lM~V11?AGaCg0H&IGo5-SCCg|7I`kTAc~fG7
zgWs+y<Wm{aL{Y7DuLTI&+A3O0RJVS7ueSarXH)@JymMUPMa0j$={_2$m$=Afp3iKY
zst%)z?;gH#U^)k*!|gb0W@p-GDl2F)Dkp9UQeNS*;eY?UWLJZ{w|Zv#&68=z;XL`D
zQ_%B(`PDHuy*a;7WY4Py$J-UB!ogPa4;RTJ_+t^yIB1G}W~HxQjuTmbZb=LK<|r*6
z`@~FCsMZG^I*wAO_`DeMK6N781F@Cb<-3N1)LAzW{+!L!9C5;;aS;3zhRIua!t43I
zlSllQZ6A+kJ+y|3Apl62ImRFRe{{WNR2;#gwVmKH2?Tct?rs5s28ZD87Tn!!aCdii
zNN~@ff#B}$uEF^_=iYPAIp2NnAJ&3Z^i)qtcUA3YZ@ir-MY0#y&>n<_k&jLHTMBF6
zma>l1o%wsCyf%!v9<nMrOwpB|=k^K0UfGYsp1pXf-n&cK=h(P39CD(JP+uh4(Snx8
zQwpUNt`v{Tejh#64{76MeOZn3>;}w(j+2<7I18Lqwwpg0)H|4vqHUOYb9uFUSzW*_
zD%iZU6VjZ8%&nt>q^O4xE^6h6%#Q>(4BFp*K!{L4*d_wQ3)bj%w2PyWHG5ezQm0c7
zVah_hf4alQeAY$16DBIC_j(tT66hvV4soc>ak(q8%eM<3*NyervcX(M)Ax<<wiJs-
zCF-Iw!xds`?q?G9=)2O)CL4#n#acuf{2}c9Q7x5Xuzw?Ht`Zwe43oI;38sZ?2%GwE
zog*#Fb1MpVVpAe5o8y#LqsgadAd!Jx!LR6wJGj<@f6Ot4-SjZm73JEYtqO5$&%Zu&
zeSvL<-;DaA&SJR7dQusVS2|9`Pf~hZ!T%KsiB!tq@is0XH01+XtVr+|cqKQM8U*+n
z3NePXiwr$}F^W}F7N?8T!~`-C&6QHZ>EB}_oaSfdqPqLU_vM6;`~9prLhpL9Fbf*~
zTe-MB<s5!~Uc6@`g_A3r(=&?33U41LxdAy~jnt&Z=+S)xA$Th)hh#tbxKGJ|g6Srr
z!2m)<UG48g53VEKfP0Ey7HwAACkH0$Uo%i#5N`t!(yVHXJ}eXL4Y5ZlZb>r+0#duL
zONgI?S|D6dTynyiw!ZNh`DKP)pM&DqmV33V|4`*fM+U$ub=otuMaV~8j^N^V;MO0z
zwD*vOx=EnJeJ*@fZIi{bIW=Oh+saQ9@Pv(!yRqrKS_qqH@lm`aa`Xzu?qg&e#*#^{
zGcODXUc*^CHX=9X;fLLmAvj*@Hv1^Vg8loY?<__QScZ+Jd^mcy7Tr(i>28u#v-0s~
z`t*{XZ;r)g{R0@x{{i~({r6=o)11L8bm(_*1%cOMb1IA>y<Cu7-@4swn>=?;oI8BW
zQTnmfgdLl1&PzAOt~}x^od+Lx#}<)s@gQ%O_1rd8?6AFCzyWXna{`gu`dfGGQD+%@
zGGa2+8~yE{e!b!{<3XX=5>LVp-Cwirgiz=I@T|>!kMf{-EQM;+S2tLPJu6#Y)+RpA
zxI71!uxLB*U(a2y{WaLRj(=%y)3japzK2NxKa4kpYBU3A*`QKk+nR3nOW$L!oh^un
zZyu$0<0k3?#8mvw+*2is`5DMt6@Vc!<$x)S5Y$VRWK{026Snr}r(?7oP*Y4Ud(|-r
z7JMxfZ%BEHPB9m7**%oSDn^`^%m^?ZMmF(zhg_6uDDmr?%+D*A*6e#X@**FY)EIOl
zbqHnp=ud^)`5W@3i|;!@;YeDhjj58oTBLSO#|QjxJ`s%pE4m{IlC1N<X1QjSRHj~M
zsH%sWpA2eJYpuBWXz^8FL1GI&5(HbO{=_g3u-1aj8=u}k^9%g85Y}craZN=UK_6#2
zM4Y_29Q4<ec75-Tg!w8pbC}Vrs}?h?$)faYGU?;|tu8joAKmGFQa46;^Uf26Xp}%y
z^s;XqSVP0o7)l$SdL{S_l)WaPI$WcSXTwH5LmprgoG$uKLZ&GKrM`|WP}6G~hRLG6
z{*!58GGFJPX1h?W>N{zP=~3ssb*|)dfd?!8aG|wqTG0{dsuI>3HKx-96sD)3GVGoA
zK_?$z$qn#ObCSgwTle!J%1WdrVHe;!c-VI-_Nd7s={8zImjT%F?HfYo5MJ2ac|!>V
zU7d^XMRmXT?L%F`4tU#cEt22~U9ptEHaFnP8%43~v$hx_+8>#vTuzC5BsjJ5sh{5S
zy(8cr5A=$R0(=cqbCQ9x5Di>nfjRJt)!e5g?>K9L_W8jWWap1=Kl0=qts>pq=?mLp
zHXn<%ihX^hnzce-hxS7N$#+7xVNs$6ISDbM7V<?kBEE1eh2}m(t*@{DoRbwIg0uAN
z6|3eelMH==cA9#1S^W>YG`|4f6u%LYK_N}QG;8oI<X$*dq`vm_eN-c((syBQDoxp!
zftgIfS)Am6^Oh~Uh~fSt@lPVUp;f*6W#9C||4^?&_h;N0Ni`J}TMo8)IxeULi;*Qv
z`_u6U)FGjeMQScwB320JjxkRzRHRIujoh2RofgUT=-y|rMtIvx6BB|aMja^)!}ksn
z${fse%&<yZa_ipn{wVC$-7;q}<w;qA5~@Ys7WolrS&%2&S445sgzFiFOVBW(Yw&aZ
zC5nNaA(>b)8~QLWHKxESBz1l}&{6SE7%)pcYaaB~S#J&@W63U#jf{xKtYvD-u9|7~
zPMOASzKwj{7jwAYNiqT#JUKki^0qD*2~4X=&#n|%yMA7SA1D-@+akKsV^2Hp``qbu
z7ZQhnLN2y0-=55nxf><5ZM3YxYML3K<B%U$sm?bSBgMIP8(RCT<etzs1KQTUV6|oF
z`Oe}C5#v5We>K)h^p(^~{h6Ml5(g}!L`K%A(+Ij=3gS|DydmhPng2(Mm&gdFkUcGl
zshy%FG2euTW9bw0G%SKhV4u==9$o~7Nb1_QB*&+f3`_sq@23$bjuo{pvQ0>QW+uUN
zryJBERX#CIFwTQHNLq;bflVTq1uR51dtoRXK*ibmPFGe!ZY1nyb8idVOs2XpPV(2-
zq*T60OM;Zn_b3~=MiXZ&)~L|fCV|YTUo>C>aUl2Oay^7%R1cTtQ%<&rWVkrrxP9U3
z&aG(6?Sqd#$>od?aM@^o*8|N7aL9Jty2;6o^KHc3RNcuZv*u~z=wDDJ+=&xGya#>G
zEbIO3pgOK=t*|;9W2v=L!Fr0<mbAZh-Pfv+n@rh*s%szRI{_}oZElo&;ga=4X?E$x
zQFE*mV^R6{R(jr|qBL$jM5#<$%DMrm5dGS(S%F3;E#FGo)yT2Z<z6}2rVDSeL?roH
z>X6_&fkV%aax+bh)`Tb!500&r^qoAyAo;$~L1A(yj#_g32u)t8yNNsh&cQsOTcaKN
zm3N#AVzC%7!*c*+3Xe<;7)70%%kZM;CUgixq~SWP>?@7(oH|5E3Y0=EkWw%*ACjpC
zuH3`TQHm95e<lg_9CbynhKKf*ZKy9Q#;QDJc7E?Sbc|(Xp|WolBsT?SAw0~5?d7HY
zxk=i*I3K<O)Rd;0WF?y@^NRC`%ZP}s#MDWSo)+|2B%s$`<NqZMxK{7}nSkro9~Jf=
z;3LblUr?rtTrHN;vwpJxRx>#q!IYoIWk&j5fi)YP#7=`!%o~Ic4ud+$_1;H7Seb8w
z(0)|QeVL9kw7#sP`1U=PG=H>hb_*viqvBab$g3W`M<LvrsR+(~jta{DBf?A70*g;?
zTmT4c_AOF3nSGx!DD_RDht+l*5Tc4*WDVp`>@eyg7GxZViMaonp;tVc0elM*RiT1C
z{p+h>8pZ~c6(8FpG^8HuIoSNSZ!mnYoBeAq?I!{VN@1DBb3yk)pZYWrVoijr<@MH~
zN*eu*3ftbs%-&jY$s_Ol!QMsY^GHX)8RMS&831Eq_Bf?`LuL_Kty1wz2TO7*`jJ~O
z_Dp3M+k=8JvRNTuqL=UybWRo+Ik?e)F(#+HWkgvRjT+3SSX@?wpY;3ePL`MW_;wPb
zvnYH*P;hLzX2CkYreS@g-s&d?ZneeTZAr9l|B^zgiFThY;FtnAR?Pj*hx`N4n>0Yk
zKZb*OzeiA+nb~-$sn-3kIzJmn%58`BT*x;vk4jBIrad_J&WU~Y{#=3^`{CSXrIq5H
z^v~hMvNJa}mIeab3{yxuF|9=0#&5@@!rMBTlAc<y0iBKd;Y>ve6Y*<Pmo~p}X6LvX
z5#TGhTKeVFg92D6>fMKjqw^%ahqCc86Dg6`CW&PGNa^#TMs7&w!eY=x+*J>CNOreF
zGu*=MZ~`{l``rZ_I*Q+K{;YOTc!4VT38DYsogJ9bTv+cB2SAJrLCP|4n>A1-9>I}V
zo8O}hC%)vE(=yd1$)o*AdUO{`%z&Ut@C7N0M%QGs@Q9BqiizN+i5`%>DC6AFAKQm(
z5(Pa2#(X(zS#i?>*Ko)Ixu{0!r0>z-xZ6!95px{G{6{I%_`Y+d91V;-fZ=c|NLYpJ
z37Pf%YTi@CyR?(2p)RVYqlwi;Jf=n_hFl>#Uj;z+x#~WLhWzyY0JYTFw_NcKb%8)i
zrWW04-fwXyF(L9OwHl8|U#obSdYyCg>~P|TP`q_vod`cw4g)|QG(H8e^LQyx87+7%
zrr3AID##+UPMToQ`;a8)ABPhNvL?;fo~TPHRTa^2e8W>HP&clROJWXO4<|xN&dO7d
z<1*#tXFPXKH~c8Wgfkp!F2AhU=3-5p5#qyx0~hd-<T|0uyj{DPZ}GkWD*9X@LK*Ry
zwEAe{exQ_;B^t?EP@;-Lh-n1dxgl|Oh+4z;~<3bFdy{wEnVg1c?!eK8+a&82-N
zBt`bZhcsK6rvqMuD1K3V5}pLa2mA3B5Rc9NwENP6>Xb>dBLr!z)lE!1@@U>gVw;IE
zk(b}#VU_Q5>X8xls!%2)g(Hvj27S*pItC0BJN0_^L*CZP=i{hQn*8*;(8a1GKF;s@
z&h28tPZ`sPemi))p_JBA=U9USI@eXLL=(e|P^1Z$2zlq7Ks)6B<2X<lSxR^_96Ov4
zj-uU(c^vgDuQG_F;HNag89_1qH#F@VHr!;pDuG1XY};H^TPWs}Xc_|;-L|c0UazX&
z2x{w|Nx1B<{VQ8cQp;rR)cVH8NFv24X^}}LgX%>J*F*Fbsw$^fXR5!(x<sx5x~^4s
z*CbmVO_6LYN62J3atrCS=B+RI<Ge-Iq}KvIQn)Q)G_FFDQ3l>UvSi^yzW}h?*5`OL
z__3Yn2V<4;(FBk8dC{?#^rn7bA$GnRT~!Rd;vX!b!h)yDh&K0bTfG!A{_pKm2a$Cp
zFC6HxLpG=zMyH)ed2w)*uELEzpW`XEH?vR%HAnQ>gDQIdC3g=W6|BQg4qr_iYBFUe
z$<1U%ir66GZ`GKM1Wzb0#+lRV%G==n>Z>plF~kR7nG}8q{6N+q$9M5%QdVF)0W!6Q
zY4SmiC|X*Z5yRGy-s(7DGtw1Ji8>TIpstSFB#7@^un4QzR7LTV)ZRPVDyP{r=mdv>
zH{OQK)9ZZ%?Fnw9DPR~f(a^2L0~}E0`i&!CH$)KugR<##;Ts>Mh{a5z10)Ha0@nYR
z+2bcmO@9IzW#YDuiVq6egjUFL8rR7r&Mi(Ri=^k3^QP>o_E9{Kc&|1Jm@pp-PqzPC
z(LZZMvX6LVG9S!p+6|I6aYMwpA-54;mhAv#B<zj%RwD#cKt-(rai#8;JxD@@h_zA7
zmzo4`=j>gn?r1Kv`AeJu&&B&4r`Js>j;U+f0E4g;x?(tGu%o2wh*icvboB=~>k9);
zwCxFsB|b%TU3p8ul(tvVy_Ixec2JzJ;CG;N9pa@AZe*DWKd?fW+D4)^kh%l18c<%H
zKRvuKcSLhb9Dvu?+_rWH=~CYkZ3=vV$!wOx$zuMl3}+zaoqDK>J32Qy@y}+PpT}YD
zQN-0{!C1JYHDFJ?{jfxmz#I_n@H_ESd&DS^7@%14!FZ8t%0;X^3{%KX6~ia`gP8T!
zqrRU<mhE@xRMJ__H9yvij^Q&0(X>f5AKb=dT!1}rH};DW2G>+4i9Ir3qvnL52%W2Z
z$G2^tAm(g9Y5TIqmc#36<KPDijiTECs${;4!|Uc<y}e^+2dTj<@6wgY)F21MmCNPu
z8_bmaEJ(uyX@X8;p}^}a)98i(Pzig7UDNcYZ&>z2q$lOS5eYuHz}?(;aGETBKTpB~
zamxG}BBx%DB+oZC(#GSqTles9rTUt=-{qH37F|n-LRbuoMzsl;EL<q9l%n^b`H$NB
z?-d0exNZ%^xc*u5?Ghk*Bz9f4&%=|B`fsMg-wT(-=YyV3j~RF(q>~C}IQmQ`*ODLg
zXUgz-qzQ9SnTEZsCSSzl$>TmCbVmjILo++>$~1Z1J9yXC$!5RV27LANq3wUVT?1ae
z9&)My&gfgT9NCIE#$jhE)v~?)QgfqDOPkIM-~WkO{KY92{<adQ3#~a(9nsw)zQe;o
z$=)y^c|ypSJq}$zC3e}S6FBI+M!ysp+Ka&*5oK2WApX}U_4;T|&$mNv>X{etQ(pNS
zbig>=2(-_CF&wzK-Gbl{D<POIrQ!nz*aZPyN|>E!+hJQ=BSrVeZb={O#(%>M0Ln$V
zilD^YR09WYGp9xX$T_)MNvc^m&Om^Z6`a!e@olF~t`(|`tQMy-^_$(e5PXr!6Qawg
zatokk?NILYNT?oZ9QkL*`*%tvcN-Uh*pd34Y~cgau%A1Cu+&M(zW|Y7F1gIY>!->z
z@8>+U?{hp)Kk3~>;<9-WfQn(6IX)7#k>1QV+WZIN@Neu1#h4#%vQ)MauP_Tsw_1SI
zmb6gcw01@R|EmVwIHl*HdF5s#->?&ye>V01rg*4v;eOXDv$kti{j1OY+b90xkIHU;
z%WN>!W##^R3;yRnQ1tw*Td-Qi%>TPKf5{u9zxAhmqG&{C|9=xjz@(IGj8j_!NdnbS
zs9L@PXI7Oq6~KW0uj0VauJy4uYBQYG{TC}zY6hT6=437TS-h?;fWJioKHIn7Vw0QU
zE588~=nli2+^npBRC_1WKl`(1>qbvV%9W~W^sY*oVqb&BOu0j5WmeI@Zme<usN`%x
z;gb#bS^L$H?N+79KkrnKhIJeeO~K3jCk{1~{5GNvc-dH*_`kpBc{5-fPl|aSRBrt9
zYX3tW{m=KE$N>FKomT7RWdA?Q^bNjY$@#Yr&At9Rcn<8X#aaLF?@^dVKe!0V`O}H<
zf7bTjf1~~k7k*P==eYaZemI56mDuOb;2=u~k;r`EyNL}T2cd9-saV9Bw}Vry(LMS0
zJ9Wd?6JIxD<{uRnbb;F)%Q;Ku0s_NhckrN1{)@Z-q@kz4@<vUJp)@n(-NU8dTWIZ>
za!wnabl0E2qiChppl2C4WtLA5sk$D}Y_`kW>(1sV@}xWW8U1YAp2Bx_-fsLW9*VeB
z{hc&HgJy#TaIAL=Ya{5+I5_-3?E9pUonZ!02kjR?rPFLy+ui$v|Iv07L*0uFmeTb;
z70Wo(@8K+FD-U>Fj@1cNGzejh{jg(O9QRsXP8M76I4m&inCMgqKrlwWQ^m3o1w0ji
z1Td0VVD-b;NHzy7d1Y$&#vtUX-;Ie<_=hr^C$QLXzucy$xcr7$VGstt2qZ7*lzh#)
zBr+k>F}kLR?6xW_(I2UX0Zo^y4BzZe^fXv6HKJu!W#t1EOQe9VX}=7&oGw++;QSeF
z4Kg;#RjE+*k+%bOX$Cb%StC;3!ZnFS6CAX<o}F6ERqM1TX<2LFb}NMmJ%?YuB;6Pf
zD^S9H21(ym(yT<V0=LBF{ou6Kfni4c*`vP4Jula=vmDlBXy||D!2j$!|M+d&0C>2&
z@7H4fYuNnzZ1oS|%0GY==r94SCOiM$L;nB8E}4Nsd`|P>{~9p=i|!H>`uofhO8f5c
zpH<n(e*5TQ(9-o~R`9>-;g8fWSm%RT9bm6?`nAOO^<}48HmOBOoXFkSgx{Oj^&h2%
z68(3EQ`(l7msza?3B3rRfpM<eWGMZwYs;4b!(md)p=BTvcZ-=mZBvz>5ofLkB)zkx
zsZIynO|Ap$|H%!~(n9aXY5LSu;wBHBHApUf2dtG@Ph6JNJwWHAj({I@%Iez_iU0mE
zb6jR4F-=V=?=67SS2-N0hsOKolc!(s-k{r%J}Jl2fPv-z9F_V7hTVM0oaMtNQ5^qK
zh*}j+EQ)sakh6RUP+^7tkQbA!?rq;2zH7Wsrnc}`7<C)#U1_Y|cGGK{ZIS$;X2RoC
zw7Oc|l15God8e;dN2)E#3vZN*3YvbpR#MxW)k3Zl@1t$KO-!#I!la{}mBie%%64hc
z(QB$l^;E9R&+6>#o_Q_4gj+qEZ7k=tmg|s62O6@Q2kz^qz7{~C-K3kAEEdDq^W!-u
z+S?YFsH>gm;Uh=oHi#f$ncOw>Mdp&#E|GVrqpT}Fk?nDm_SYfTP^;xk>E>#i!H=l#
zC$szQz`RVuMBMldq>KbtuBHmBsph&v!`pH=LwA4-<NBvuXBj2w?uj=n0PzRAmAYh+
z){>ORrpw~L8usrzL(%Wor%uaaQDyjl_gz{}=-oQyx|*8*p6UO21wnq$`T)1Gn+nP1
zIHEB6f53qfjeL}ZzSLgy>kRJgsFYr)<~#ol2*%QOnfK5^vJrc)*mmBj=IsSB1PLHA
z1d-&J<>l12*oDcfQKMXj;oZ(^jW*bv5d1(}YSfu4zu3z1+z2wuYOiVxPhOjjESlEv
z@;s|L>Vb_1)P(I}=vkiIANGLc_2+4&tfR%Gs<=q}63hGu#iDRW(=5zJ9xm@zM>&BG
zn~ra?d!wJ#2q-0<Hj(%E??}q8deO6vW*KNRs<Q59T29R<>e<C^_^Ip)!nK+TP>r*(
z?pT<W;_<O{`@&MJ+<#kXr%{XF8wK@pNLxku$ZSqs<XLJHbYkA+^%ixge*J1ieR#oL
z10VDYq5$z1juogs_hXhFzj5dYQjEup*lTXt60k=u@j1e4SX<#(IDDg4C6>h}o+Dro
zoAG#Ly4X0|u=Gr-2B3O?lErm7{vJJk=P%9oc(yUeHq&BwVa4_Qn3C`ahCPgwZr!TV
z=zxsizm{ciZ8#E|GR+weKWon%HpI7$JpF}@Hs$9^S8S>mW<#*KV~~7u*ryp%FQY6c
zZgA8pBr}Y&8f)#@<ZVFGS9@cwKuTJ}BVd%i0x;dDFF>A!&GK)}qlm1(`86${9Cw?e
z33<!O0C}x^UCy^M6$w2a!C0bgOkJmNBH`E|O&n4X#yE%kccmQ`>)hz~kTn>L1(N%j
zE*aQ0OkK3b5AuGz$U>!_I)4td-&N_+vLnKx9pDt5A^w0(+(`h72`lW(>1BCtBJQbH
z+MVjXD(Yy^dWzZN)pS)~&4vNS@}tgYou{S4P620!FnW`SHU;QU314fOFBF{H+TN04
zi#8~gNGzOwXcP1>Zte8W!spj!BJ?;C+_0ImS-I$$#QAWYjgZa|aBb#H7o)YK{UbJm
za^{{lt7{7Q;j7B3l7vs`MPG?I{nhuaJO-EYZYflvCgbT(C@zwd7a2D#GwSh9!&m^=
zI7|Lbcm}k{D973@wH_JLOyNfq&%T<x#Ny<YXjhISPNwq-RmQZvZ_k&F8GnvSsxPoQ
zbgr!5{|PrJ)xRfZTLE<cZTBsKIvLr%_8FX7u73*bUic4=<k{2m{bsC3Yx-IM0h@$|
z*Dr=FE+M3qylV435$zkzn|PC}VL7aNSYV`{gT{#o83`z_(cM5kWF%?`e~`ry)DY#6
z`N9FU{~T?O>zoJ}B}hCd2WGVi1&tpwwgl|`q`2yfZJs2)E*n5}<xZ}%diSM8Cux#v
zs_Q*wX!&hcZI|GxBShkDfr`d3zU|;G$ySf9rzl)jMa``A-WlsMhn>xA<zVde57X%(
ze4Efl4kf+w%i&Eq7%7VD%Ne!N6=yYL^`XYKbdJ%JHrwd|+r~7<*FV0a6LO6fJImHh
zp8%5r1#i>LxVc((qhQB5?XBhayxbuQWnT%Mr4Li&#d|)DkIHh|pC)T%X0TGQCYo59
zHu4mxCa%9CmU~hO?VT(l;(+<FZ?R&bVJ^;E5tqGek)ODM8&;#vu@rXw6*VUkil6M1
zmDzmbkoFQG&83an^s=kG=}X>993NbiPjqHDllRzrLU|q-k<*i=`6`&}{lwh$!O2=$
zq|^Hprpf9BW6o*Vd%5&Qt$CI=^SF`ce784mu2oN<V?`G4q1JM_E~P@VDp;%2EP0Oj
zoBm+hQ`Azuy_Q~Mcx4U%8kK!@id*AvTRGwah}{1H3oFClr{24v^LGu%65S?Re(%#g
z-Q#-Q<y^a$Y3l3bo5}1cWk0Nv_QIE38pTt}<-67SCsxRdSHC*J!Q6vZuZ&QpOEkJ^
z{_d@|J*-^5#NSNfS~K%chU<VuhhvJ47*lRswKU^|wCiqDE2anCnpBUJbTU27IWDlG
z8~bZ~>^>BO!va|W!U#_3%t05eiwEyKfQab}Yct?&G~3&X!WT}pZ15I}2f8sf2Fx22
z2DPG6JuZac;He&3gkp)?hxNQc__oMBl|*NJN3AQo*%xV_g@Cy4el6gdb52+=8xszB
zLq)e$EBk|Sl}C}60G8pT^6<dGEOHyS(9o=soaj)6%~fP@5w>2U4f#%~EdCyjwfAwI
z^AOFzQ_0tJ-7J!RN)YesEMoqbMg*$VB7=%A>$k0f)C0$_ag5_xt%8x?S>q!nkc`lX
zS%rUzF2D*rZ)>M89?^;&tTF}VUg0xDMhYB-@90e4KQ<BAyaxt#&t%n5mqO?;zowAF
z3aUm$mI724W?^X?GVH}sC$_xQ8G5g$J#O-Dq`MwjU{0&*QcgNMebnQ+p3S1`Sj(a;
zZ%a$g>fyH-pMjI{k}k2jii7U^Uz%K(eF80_Y(CdR%kzI~oRvBORL@~_z-K_C;Gr#N
zjaxuNa`=>rJVoP9hs-ab^C6}}HKR>~=mTAHGXMmZRuiq(f+x)Gd{O=^9X1~C`rz87
zReaii+jP{DV>bPxk&DM<h)IAS>1Fyjheh57$4!jklbD;%I-w(#P>k}N$~f7oE!8Wp
z|DQ2?h^la7dqk3N+4$G-t;Gws;!ztYZ)zzw+mBh}HIrP$2{P_D3Hu2l2-kbpTeisJ
zh?#0~2Bm@>AxPIPCRIY_d<!~0?$dgce^^p`{6^AD#b|;v@+{c(rppz2TIO^ePus6D
z9|r`cpO<mo%|HDY>-YH6hUX*Vqv_(=l5JMOQ=w!Wi}{juR@)wtGu<E2;6jo2VQ|*m
zx;#Ha!4t?ZKjm+tZ15h8Kx*4JXJ^az9=}a3P3Hx4vOd=IDn3kE!`NWi^cDnB1$DsM
zC|+z=#e7aAqB)2DBr}*?Vg=|Z1#Z|QR1;p+EDz$h7h7SYS2Xr1x;?cF<lfR8UZ`5E
zr34KGDZ%I<mw5K)^gtE0#sKK7U=c6!Cn$kq#n``~-68t|M5RJw?g$qkduhzo)7egq
zY?TjP45KY+<%*TfXP(vPT;{Ik@b`z5sRj-_j<>28IJNmc<#sp*Y2CKjynu~)iNuPk
zZo5VsR?A{QGIE3K&T5lx?3UAS4rEGNMnhH)zD0RR^Nhg$l^@#!kW%r@Z+8Nh&Gk;*
z8bs^b`b}o}2EI%rxKQ*RuJjRk1)@cbg{5;0+a*Nz%U@J0z_P99(BlB_rqW(y+yi#f
zN<CvT0&r96IYqrW&pABdZm16P=<xx<i?q*HJ7&@<xVUIAp!ZQ-5%w|l)BX4eDOMS~
z-;A1u-8dnd99;Gz<xnYNsFQa+U|0O^fULnr#{@)Iw)N!SY$g7dSywAsDHwSX;OX%V
z+J#uX{cMnJ*NzIldiZ)rW+fqcf%Nbd_9YZ^-uHD-;)?>tm*oT8kuMuj!&hVCVmZs&
z_9SRn3=XFcM+ImR&H*?{FV*Nzx>TW-pV1x>@15K8?RH-8_sp)HIHXbvTM^ntutUAy
zo_q<w?>w$rEd_I-H@$BEQs}h$A*n$V@In`m9zoKcgcX|WPHHx=g-11fhnyo7JGzE%
zPhE87%Hvyd^=DR?LULp8HPkLYcfuqlNJ<M)s5k6UXniw}UQtiGxp|u@%y#-R1=0l3
zf-w7E<u9Xd;BBzJ)nI%gmocD-GA<WTz^KXV`ytj=Js@yk!Eu^esXua<Wb=ShBQ41@
zck(bdMu4;mF(+H8%vqC3HZZse@3Z+!<4@kBH#Dj;@lPPW@VlpS(K_>P&@^qOoi$ev
zd_GMsT@IjWwTp+#I6LMcbO>2`sP%mEIPChSMZC-NPiRp7IJT9c_0H)QM;AG3Y0Gx4
zCEK*)td-y~RqNvM%b8B!?b9WVk)jW-dQr&8m<Kfok6ohQO!rw<D$rlYIhG4-8AJze
z71nw?VKk4Qbr&Qs3MV;5{VRPysi@Pg(GEN=7W>>YJp0gPV)%(eqxfA4LJ4vQxY>Bv
zvM_+p<v?ne%0=<d=8n#@0A<}WZ(R8!wPRil48rQ?>!P2JE(3%G&Xg?rGQt6-%z;cl
zk#TxJ-a5?m06b*SQ%dw;-p~K`nI6)Oa-%xT>La*Rx@D!Kycn)80i3cnp-)}RkFKM{
zS7j-L?RzO5f!ryQiK@~M^eT`Aka7?<c8_ySeBPuWU$;O5Lz8LY5>2$7wD@SFBdTbN
zd<T~Sz`>YWC^S*ee&J_5(Lm}HlAEHxSxDzgEN-c&P|j)2Eks<C*sdhmY*$rSO*dRZ
zVF#iQ7sXC5Rfe=jIj)p)F*)EQrQ3STUzT3^J}#@&bfsln^i<d^!@nl_#AW<8GPs<J
zY5R#{bJpom*_87Yo0V-wqsZc>P{Dkpe8;9g#<m4ZhkHZfJ7JCFz-R61Xs(S2&y~Ur
zIrE{VQ3Ecu0vECx=sJU)_<i$0c#e1ml(Tb55#qRyCmwpUFDmqKZkdTK$JK}v`u?k4
zHbBH4t8EO2zBioMFOJQS`IKJf>+3nY-;z{W^*$Sk411FJ-GYfZRy|&MV42yBOTb}v
zf38uj#w*L?h$tOi89)U)$j4@{b5X9*d^>Xfi|$J>%3qE@0<SPIyn!|?2c7M-=f9Uq
zw9S&Rq)j49QK)qs|0D1**Z1UOu87yiOj?qK!?iE>`-uDO^T=Wd5+~G>FRy~q`#^fl
z7?icJ+WK8ym8Q)5@T|NYg13Eb5$74Gg)vy!QkCl$fu<-0$pEIOv34F9A3lc~V2UPX
z8@lF4D#edPZF4W9HszU%t-XQEKv;~vZ5sFyp^T7MNKE;{4Jn-Fp1L0=t~6m{aR(6V
z@&<W?3HTqkR=}Z&4+XY~*N+a;eLpBxkRr^Z*D@o9T-<snBHw@5mOjcE9cyc-8^w<s
zT^rDdMaxls(4AlvBx#PRRc%(dYeG#)Xt>M=xWjjs$*9|g49isGbQ}^^UnW)*aZ=Uy
z`Vm=0k!6FxhG!n;8vXpX^Fyj#;i!)5DK9Cl?p^Pu(~FCgE4Dg(GmG0l(qWBV9yQ=M
z{5iVW#st%6-ga!hB`7Is(480}bn03odSQvG9%hL#lftWs=NVaM(mClr>3~N&`p79%
z<-xG?;M2;bTK8GyuxWMGpM=e19KuFZUm`p9qumgvco(*ht=DXjNnl!QVXDvLM>`NN
z$Psljb_rvp#kjBEy$j^}$=x;qX15ia&6!$^i>ed&AD)M@+CJOod;H!#NML`z{+LTp
zbcPfK2262;afwtoU{@WFz)`r9@C{j~VT=^lWqD?ee5-x#i7r^ee>gen$Oddv)!%!c
z$M<@;OAvXpU!MP50?d~RuqI+AQdlJdY6&90P9*OOu%w12VSt1eZjBCkZrH5f8NFAU
zO<hdR0n9<B_@DVtNTL;MbVmV>rS<bgsbM+~8gXrS<CnVp%@cZwEQ2*P=`O_)cm!TV
zHL7Su>4RHH31aQ1C&BMK3a40k09k4mWf$YO`j$yux(1^aDeX&Z+?wmr7yo=BIgdnM
zId1XX3L*-UAFw{4*5GJjs7VQZEt=s)4!Vx&IuBTQar^R3tgo-DRhrx@RaVRcRpS`V
zAI1pJZrk@Q&lpznkLq-}%$n~RZFa|On-$J<irNi^D$=doTX{q%c@S5}dYzEsPu!ff
ztd4dg9|w2d3x7g4NuMMxG>J)Ab{!+ydrAGgh>}^CMdK;a*ltg}x_*;3(wej1519FI
zcV#?Rya6fKyxFUr$cC~n!ZSf$SBxG#rhw{`xSeXpGt}SN-YzY8we<%{Yn5XU*s0^(
zgIh$=B1PROIST*CAt<elj^jH}b^Ia*QU`sGu*1_s{mV!P*MA1C0C~>a7Bl%=3>re_
zpV?gybkmSYCwtm+xk#?pK7-3Dd&!la6T2ku+PZxZyq`3DXddR&5p}H5{XLja_jz7v
znVWC&?V1F+Tmt1q*G1n5spWbR`p@-!fJ8w=g<aqFjKR|SF~t9o1PO4JcZ<??OPHdH
zv4*vfcoye`yNnBkKe7w)C@J-o&+Ca*W+EV=>~$p1ql^_F&N%z<2=OHcT)>N7SN0;l
zTt+w!5MoEQJIF9LtP*<@;&cS~BLq(7g#^!jd8`$C&RhK`!fq*ZLOR;3_kxyWGM!ZB
zkuG}p&ZxI&T-nPu_92{f0U<oA>ks+l-^FtdUD?)Mx7s81GZ0WU@6w*jNnu+|!0@WY
zEzGTx`ca#78)+~Lcs7qfadp4h_?Q<+(zl{l*pqlGD;TEc<*NUYQhRwgNH1s!cD0)y
z{rgPUkM`cxmo_fihLsbjmIB*@;z+k4PaJbOriMf7U2zcAd(X)Mj#Qq^gqgux?e~Qw
z)oWd(qDMYw#jC<~De^D<-git*RzHn%<Os+Ew7=rM=%H|Op4K)Y!_n#mK6=;w(+i-~
z?-h`V(aM!<*bG0{$ejw&7op0TABGYT3;5%Fy<t~*|BFdLL*bJa^$Gt4;E>{s!he^`
z3wSOJze5+{*b5ONcxdGJerN{VtXr4st19un%b<uFh9x@X43dEd7LFjyc}>jf`xL*?
zuLoVAgHXWGLBJ@d?%}m`tJ#gaDYlD_m9>#Hc2SiQC}kWn=-Q_?9<Hc)()OpqIv|5I
zLJ8{}mOn-2UPzTS@h$6Ir>W^+%8C{cx8G`V(0}~&X#z4`ra%nDDaXapxWOkxsIg2f
z5#^&uO87jo=#$M%$`PBUyYI{Oyc|qEkft0s2t})PiR+&>8yo<(9p>lPOAf7a@4wW^
zwYp#xj@q@ON$M$9l^}>CG9fg;Y|{xngv2EHJ^19?wb_NnGRQ5Q^1M)NYZqZ7$syGs
zE;~`%y!m<VRHGOEQl)U2=tqQFSUO<jyD0p4KXJ%^RS-Xtm~T0bwZiLj9!~5fFV>)r
zr`Tkob_O_%Dy?-TT$ugJyr}*<Ri%9re6sw#&rv5vNzcVnmC-)%bVc<7C!{6V+V!dq
zPh`nr)_SAZ=PEMifyj{QTRGb?+fm9}U2KC^%~+X%iZu)Pl*gUG>Yi1#X27*$oASMT
zuk76u<D5^;@KfR=a1rOW%s)Xf4cFq&s;i#mUm;h#N<^zt27+f1o~;j*kv^}QbDR18
zd7qMPwn68#`if$p<pRkSoMe}z$G5>?S+^yIknh?y=)FIM-*d0OX(4jG4K6xk4?%!!
z_B@wn>~9P?u+E_QZn#{EA<%1s$C=My#4dNxgCbDL99)#fW;)bu9|t1fZOw9B)i|9g
zvz@ZX;<R%2Jz>NTiTT-0g67Q^Zsvn_I(=Uk;MmYEYL(R|b2HVo#xyv)InL{Y!H@PA
z>I2j%4Z<B?S{o(_Hfn@9{MCNCq79qUMIpB$BHiK#Tn`k=j4QhO)7VfuHIEhO!~HbF
z;G+NF`WD-(Fc9OzC$#1<{s<_S{5M;1U{eLuNb$(jeCpv3kQj?itMh`dvD}h1hcGm9
z^0r`wSyc3}nn`9_{767M*$zRtxn08Bo`NNfM$`L_2$(*=(YBXequWK{i+`&3cinOQ
zf;!xM41bSw`Qt;q5p|jJ-Wj54!swcK<IMe|zFOuV3dpT+rB9V8lJV6S%^q6~E>dp2
zBgO3wd2EFE0$7Y87AWkdgxsquBpR+kY6GpmxZi%f`bK7H3O<kYGhL&`CMF5gH`_WU
z%@VPyA`<T#ghf;G$QNhc!`J0O?60l@UD?fVg^Lw889uz09$m;a)2bApS1PpmYZa+W
zAiz;W*|Bw07Zo>*$ltr8ciKhlMLK8Dep50q9^K!EvtB${=WDCdX<L-VaZ5?;c)j|`
z*2KU0w!=$JHu>++uO(oJoXeQodmvD9KR{2*juIkMNiA0;JhH+1Rc|{Vbg-#%Pp%UF
zJ8L)r;|#)v@!PE?Q9E1b;7<hhZ)M7r+rLNhNg*a~kh_65Gui2Sz0V40-^*y9Mea0G
z6S?1b6RH`TZg6dIzIFe~t*TCIJXBH89s)}g2a58hJ7-+_BG`{SecyH-cpg?8(#?{C
zsfx6QkQY5tA89<Ck@uvXR}@QDYI*JgO|fd`(lD6-LzK1cj6S4!{*=d5JQh!T{3zKT
zw%DY%w=?YI<P#3M?fvX)&dZW=eycJ+QV3=}xHUQtj?rZn{E{h54tC$!8@P6BPfh*$
zwJSVanp!Tviw!4e0BvgNU;wvX=q{hon?bLV42$&H<u~G5isam=mGeWE)Fm<3tSFQY
z+RE}ja6Yi+wbG=I>QEHpb3ZZ{HEHxy3`O>UET1yBW-1vUgMOs22ja5Z^7&*?`(zUk
zROD5Zqyb^6ixT}zqyqY2flLRkgLFXYxS;r7wdky?KZ9Qpib&{EUaAkwYFhgrh87n&
zDvZtUmx&u5DBpR7^9OKE21x2reD=dR1~z7g=H#f~Z^upYmOt{0i{KCp*L<bb{b7-o
zquX?|5nKC}m9R!C-}f)&Ckz*9Uy*FTam_nj_rdV9qAABar;!ZiX_Vp(;C<b8=6ld;
zDrFPR3EQA7@qNB!<IG#1oXLm(hV_=v^cg5cM<P@d4zLT_3MQxIU0@<r{V)^F`TUqk
z^d;86F#%}aYN$vg8DH|^K0{-fhjHh4e@w#qVjqKyZ;$k$ns@$@&dq8_{|cueyr=AE
zXO5fQcbf_rM)&wzq*Clm3^;RwviD}_rF<(QwcMk&mMT!=G~<X!q)X&T9R=6S2I+p2
zUe&R*2Z!Q$VpKvkErRM4^;oOeqZqNdJ+<1Rv7NhJIKv)3@dZ%zCVCrK2IZ)8xYv$X
zIf$!jur>;B#YA4wnQ|`?hAd)Hoidt12zs$XZ~M%8mDxKj$l59ZMNZlm7L#7IF=~)(
z^KMrSdT%Y*Frt?btB#lzFN2nw{tc8p6~Vn1s*;#S54Hs}--UAqlytPzH!^Bv4Bl5`
z<(uC75K?m`jFDjB8}(CjO<l6{-C#Taqx+)$H3g}8cw7t$-TOqvvySZj*Dlme4?#DP
z0yi_ufF+rm8jZU6N&fCaz40!*h93W(<iYv+J(D#H4EszjBw%v4X^A;IL(Y{^snHQA
z)31x8aroGerspZrO4o=+7RJy<xYy}ZnesZRWdK-vrFvEUgU$Q;QZ-9WsdUe5xAArL
zV7%=q#Youy=~ATSGWqhjKmZ2x4S-I_JnfN*n%w9<mi*fBGhey3#n$txgF4MVv*~(v
z@y2xs;zP8Zv3*p3zU7}dyEk~1DHbS1d5X}6dA)7wx;JHK0rn_ko;6#v+NoWCj@1fE
zNAny~!=ApnI{3lFkk8&T!xFWCelySD8zC+&V7F*bMB=LB=E{jozF$P9x(=(lKL1&8
zVByVgIR&OjzXW|XxF#jq?DS}U7EuUX`1(AykDe^^ajyqEa}Dt!bB4piue~Y+UU#s>
z!sq?e>=f>#SK!b&u{Gok=a<tow`+%NPrBF<BF|%mN@%S^Bro4a$4%`qwQx?#3^51I
zg@!P`_-Lq_fUY`H$&n?MB{&4^xU^V)DHK_Hz4Gg<-pgkoVhTb8nSgR=sg+odlMrYu
zie)(C*a_Jx<GqZ|i%KHpyZoff{i0<>4-d&j4lwe9d|DmA>5V9p%P~6->BKvjp-Z1U
zQA2xmaNNOgG%%h=T>5bmG7-WpfsoxvAizDdw_VA`8YkB!ttb7)1|)vnA`@eZiWo!D
zc^jS!FV84rJ#ZT~;GjL(z^0XT;Vo@^AMvHiC@DB&lZ2s*NrLzykWc8NWx>rHEV-60
z!%i3VQ$zq7MlWFLxWBst=c6j2?f44)i9Ak@RdQZ>|F@KIxJP@^V$RhDJh*d0tyx{L
z9QjFo@HMA9W=A~_1BQuw6Eo^{zkhq;j#N8!v2($N%Cjarjw$oF<e+Kk*-+q}&&+q&
z@B{84_=#`g*8$e=TSopo(_NJ%Tmnen^$o{KuVhVv%s>H*(j($f)E|!THyeK_oZl4>
zT<mBS-b9-=Gu$F$YxV;+h%;PeXC2izUIn_w!H4s=xkpb9=1*jqNYGpWVRT?mW1-s~
zJ=FPIZn!WK2caN|Isk!4a2?S7>P^OuQ#7+BD&T*j*9#Pp_gZSQJxfs#Rv2OleO5x>
zj997-R3!K}#16(2Wym(-ypb45&5u13KiLr`h6Tf-_;DsB<6X8UP8!d;u+kUYyk}<k
z{M=bIAGB5U2ZTYS4crY^Rr1NIClWc^qU#F|a02W#^ffpJc6*)r!sc<|&^Cj5;N>Pr
zTc}lYl7YBGjHk$T!L~f{HXrph$8WqE0Pn!Kkk7)H8+Lg=n7;^)qGKUv;$H^WUO|#<
zfM}z`Kz~DOBwgvqbTr?OZWt`#X&LxYv2Z;JHlj>!$SfX1?A(b87_xksv8dIeE^24E
zqJ-212C5OOde3TGIAx!A63vXU`Y@QN#rQ9ZA-bORjHpxFfWpoCcq(MEK#sOxom8A4
zZ(MlH(#|g0sW6b;TLe2yj-!htAoT3ZvEE{z855D(Y85@ckoLH1nMVGVYm2b;h5*#0
zG!9ie13elXVrJ9g0ki>!e0aWFk)5VtOSNKU@+XL~zZc_>i?EzQ^?6ixCj!7pg}p~_
z{)pP>O9e6#C4G5vYNwGp!{9-jPAhtmO=my9dZYq*NE8HBtWdt=nxkH&Ta+weyD}wy
zlR;xL)#*MP(>sFu=+Cu2eF2xh<=xV+b$qnz>+l20R<}~2b=3iRZF)c{{b6O;W=y&(
z4sHHtN(TG2KssYnPA4}uF76?_#aR=ZvrTfOu=L*Z?wsk%{1|acoWv#*tbqlgP*2nH
zYd^rc2;A@v&}H`qmrqqDs68YzWfn8_W$s!0qWG-U>6o!sR>4<we!SJv#Pgx=1rXiM
zV5Hb~Y*2z)G1PJ-WCS*E4VZZ(1Tw1~aLGGAT)?m}TIa1(aR7Ee*!o7cFc70)>+Ok7
zuIt)n=`~JU#v?Xw<#}u}%zpe$QWj>vywJ`Ue|RE0H!=fMQetR~8+~RTVS&vv%=#!(
z-V3NOJqhALCtH{vRBwi*(v<Hpv?L>Q9(T;=5Wz1tHq|J%vhL0KYktzBAus7K_+;}}
z9_sqBhf7UNqTb$KB!Q_xeb%KH@YKP+LTZp^a5u-?I}u#nwS9Ouwmf{RP^Y<3zU*Se
zDDWcGq-dyzQCI6~mz6tT>n?&Xf;AXV7?mjR9`WQC`N1Wl@`e%Y;wrO=c=Sj?7JtIk
zR-6tIxek5)I3t|iIUbUI#AYB%p3Htm5081{GeU9HTeA(w4ZJZkToE2vxa0f4LrptN
zY_DK65sKtT#zdcDpV)AtfzL4_c}^$4RZZt>dM8s<*d(Y_8ZP)ZR1G@Er4Zs7Y~?T?
zGz9auC@ZN7hYl9sO#b?{h_I<Ry=Ct`&QOf}bXSgO|4E*17<OpiBRP3o{LFp>wMpAP
zF*f~1Q$k>ioa`rAC<?F}OUh^juy}VMzVt@k8%)^Wx`jWBirnD_E-Mm{QZHwZEE1dA
z;}6^tNo~l9(S=OI7F(iw83*_e8`YU-n#g)%KpdvS1t7ihBkz@I`F3Tc|Hav%P{nKx
zN-XDt%W}+RyN*Um(?~~GNd%^Hl@mp^5UKf9PC?Y{)aD11PD_fz#dLuV7}&Q$I9a{1
z-Pi^Qs!`t&u}wZ^@*)ug{Kf3v%>bBPdoWJR2)Ww+$y4WD?=?H@JuS(QBPvLp`Zqzy
z#D+gKyhrR9^_f8k;cm&!g|NJ;P+N#Ok@8b(e@f4dToXeVpIBi}2kIR{|3J_r*bPxI
zZ})8|E1?fpo|TZ|>XkUc#F10|z+0*DkEl)oj!%l$XJi!X4hA7+2|27>Qo{!F?+FWa
zNSV4Gj!LzCH{^uJnQDo(NlBIvs)DXGBnLlA$;r~ksD?uk!r;RxD(%y2Q-CmQc}YeJ
zdhrXQ)7l4hoV~B_4MDsT=Q0o+5&dzh?9fId&?eg;g2_2(vg#m6u)v_OL5-<oO@#eH
zlJfM@%OZ8+_Y^u+sn)DcJhzY-+r2PBKbavtY#vBgnjocFuGbGCfD0p-3I?Tuq}Kr{
zAP>0*O)uRDh6`b8KQ^7T^nKwsve1ZBZeL$A^3k<~!3=rNjkh1Ih$~I}t}&c!d<q>K
zCkccyDT7?5v7Js|;367UQV_8nh^tmeVqY`I!WNgMhv$$s+;Gx>H$+01Xcf`Hk{93<
zW4EiH71;MycFWT5{V9SlcWF<&b{MD~-?rdV(#+tYmk?>$OXaM_n08bg?bVlp!f}qc
zZqabq8hm05UORFSu;98Xne%c1T$QG8SDr2*Wor*L9pL9;pGV;Mq|oPRdroV<R@fek
znFtU{gvw8sM4;y&Q6lWs<SfVkww!mto|BQXY<${erN4+o8v8!YzU0W1D61AcA$%DK
zHi;y6dl;uO)E<!do|oOh$D1}J`|8r_Q`igDD<8Pdf)C=XTno@DfZ6&Q>hAQLWx!1Y
z^ne^(L{fi5I3gr!t5Md%UU}6n<^vI?Hz;=m9IT%l<VNm)Hh<9@R(9N!F;IsTgz~kg
ze`53|B!U>M`}@Sy{4fNfWl`Y*s3X=fgY5XPr`);>o${G-iV<GD$f3`OM~Q2gooe#G
zTJL$hd_?|<(P9y7NigX&$CE8i-0!ph`Qkf$puX8PfYxC@e7Os-b?felVCv%pygPL3
z$A>Rm_l3_gzl9Vp!bwSAdWzd<XuhNA-zsFI#w>i0c{I3_G<~80wqMt&8|VYMWTctG
zjmv*fJ7XU4H>luCP%T^rOeWcM<=fB=T@+rCV@^2@PzZDp%d|d+8)z|j-!o)@%k0Zc
z%4aH%1bKedj5<Ant<$1IHV}Je9u)hmCrPH><uKSjr%QQCbC!Vfcav^ECC+Un0ey{e
zo9SFE1y$_$&FQMzFYLxj5Ip<dFjLpHl(d;vD~2dau0(P1LZb;ALaqdhW_W%uE1nqR
z6k!*6(@lhT7lF}N)I`ff6~0O$-zR(7vrlYg=fSKO7!;ftx$DJ9QXEf80^u&D&>~Y6
zNUG4y;{T%Tx+>+s_TE!ETXw`46fuHDupe}5&oO#XHPjb0d7o%y(B?C$KjNWJ1b}ax
zYg~P%JYLzU_XK|YPBJl8ZcHrPRuh1ASJ9>3MxtK10h656pI6sP4Pp+)#B>_A4bFB}
zCBFV(7Sbm`K%qH)H#o!?rTq?b+-LxPbC%+oNQ^p22w^^IojXrFPB)P>P>HV+=`6gZ
z#UceS`Kx<hLg)vDA=F|2H|zEr9cauZjFFka9yk(l-V<_lTbKEj{UWs+R<U;6p=xQ`
z{dSn10&+H{8(p#KQ6xwdqieoDH22Pa?dF4twiFoxWfWy`U6EW7j9?j|k|A~p_2Uk{
z{&Dg)B&*O0UuZ(?7;G@AF)0xV+JgO-limzJDLcSMs_dI>;C-v`wPSZ^G9s2`nWQEt
z2ykNvQV^QiG&!RaH~(T^{)a?rq(ZSMos549>ruDDO{(i)K~1H=oxxjhxYjqM<F(o^
zCFPEX(|lZWY8QgI@3Ze^=P!0-=PbtsX@0y9gcwW~-|v*Hl=<P~9vGpL;CLf7Yxzf9
z{Fb5m8Qbj#f{V3Ba)79<gkL^MtND5@0gv+i4nGKIyTd2b=%ByMoke>v>LMyOKk0{2
zfBtr@CL75f#k<Rcr0>_8BfZmYK;p=NoKnQx2R)*%%s7%3#KS47Ec6ER;rPiOH-|gJ
z1Ft7F`qnr}vEyp2q5zj(NGwVjG$#Gf0C_cf&`X6%H9s5Y3dX2j$1ETX?sUuf<)A*M
zOuV#^bCtpG-MR;z-E;qK$+n|C(GCA-XDJxXP=5)ne6D<?zq`?WH_4RF+`aFYVB4-x
z9POxP6{AtE7qdL$sg2mMpgRVJ!0sjd4rO+*2}WT)!Ue`8jl*;ZWFVBb&Y8yojr(mm
z{CfiR#2yTHVlkw&<Phfg@Hk``p^yEw;E;D@9g(CZ==?jNm<2FXM-jTAz_QoX2$9i1
z^bHX2F58<W$jmwM9+XgD$G)@bdBzae!=gwu%0%GDt%!qf|48LV^G9_3m|^!TrUG-b
zRq2-+@&n6XH9kI{2qytm*`?nitF(`g#=_yZwfFZJs3-hzBCT8JKgMid0}_96LNqoD
zQtvanTnW!oPmhtCWeIk|l44u&XC-4L-LOYpp!MJNg{MhgmgS_veXJI;z*MQ2Ik$hO
z*tDlpv#Ha1Wuw)kTFWz=w1$(+oSX^3MXOPKIPi*7<6PgQrvTmFk%0MbguFm7{U#!d
zPQ)JZ)UVJpHF9WS{x9g_%#3dGY~G-n91@ubX)3q)ehM7hd9l@uhe!<QZ@s7>J*~dS
zuo!n{^;lXQ?b!2nlzThB^#crWv)n;vCKgCM5m{(pYpNg8C@-g=Sp7ka0|?1H4RLaI
zB*ZNpx^?E~mg4v}X#<Kr$8+LnPiB5Fctf8^4hjld96-W`hIWhEy&+%-mZ}Q(!r@l;
zN4{gD(SaLB%xNXJC01x<s4c*SrWh7%VW$88czesJI<|GuHo@K9HMlzjO_1R3F2UX1
zAvgpJ?ry<dGI2<73GVLhaErC~lC$62=l*!Dy`PgdXN{_%#TZ}zdg39CjrtLFurq9?
zB3QsVCWHxLqx<3*3$=A|ns2e|2gO1n2qsc6H}n{?_|45tV|IFb*^~fV(^94-m$rsC
zlgFp~q`UKkuB&MNH)1P_T=WH&M)p=4!E`X;Y)}HaWJ316*sxbxe;7=J3~=;QQ3DA}
zpb|h+aV7Lm`r0C$O&QyvtdwmOow2xrj3JLr%zrX<QB-bE`d$8^%0}XCdNH%8Ni2~p
z+`z`F{l%%52_AMJ^$QSlDN=2Ww|Jg%j+uNa9M#0-_>Nr59w&OeU$*h5yW=B@^n0BY
z#5u7}oWh>qi#^H{dZ_1cUT@nE^F|N<h?Uj_h?0b4Acqe;6olCC0Bp$*+tUf?uz4ly
zRFh6U*^AGDo~Cb2RqCJ4WZ=IBNF)bQ@3MVYkd{_Hd^B=x>?qo0%RIC4<}`9O;0pH-
zU?`ye;OhF~yW)L9G@^aX&(_tGky5{X(Msi#xx(Xk&1-qm&dJ{#Nly!>Cx6Lq$eder
z&BD_fcHIDF!F;PjiyYzQCV!kQ9N1<HCw7_OEG*nVAQ9u{#>nc;Ts~Z1sa$n^n7>l)
z{7G+SQn@6LhP^hWNQI@qJ5g|{h|Z?u8+KJ$=As$+>{`<!R3w~~SdtF6H30E3TTr*X
zoN}lWphXH9r0Nv1?yBbywov|zQ|VddqQT)~Er`nO_j4K-#onrzpDC2VU)cjqRjkpg
zB#QAm?{AJ8{tk#CF;KP?1+ciA@Ppnk9;gE455}u~4SxnW1IC(jAj-OAlV_AJZh6G0
zo&saR3Ozjux*IK8%sw|$w&Qjk{zN~G*+Vz1wF3l0m~KyYVo5_^sAK%KSn(Ho`?<dQ
z`Uad4E>!<Iz;Fnw#+fuvUTv5b2Ml2i(_KHUI6OVxtBw^F8Z)N35<mOlWQ2_uG*RM|
z<D3mCmpLho7lYF^j}yHnAMCJ^t~cbx2eCJUB)_5g1kW&<I`c%T7<drQX2{~S|71TQ
zh=MB&gnp5`)#*7w7o$p7QG+lbMUMQm$D2wEx?14)t9#6iyrM*BNCwJ7s<7=jQ&p}f
z%eZBqkZHq-l(unnoEm0iWirHKS?ojJsP6nw>v5{ciU!Hy=FD=YLV!;ll;1HLILVb6
zx=NrkwBKG!&8sP&>4a@ry;4;|Yq<IfU%u~at|?V`N+a;!S{Rak8P5}M?eKoM|Eaj}
z9lkDo0|2tldDWRunM;$RF?j_Q3EjjkF$%5ik-9OF8Z{^r_nd{96|eRtsR-_^amIhX
zg<{}!{S5}8C$?GJke8l!cON-4Aea79!0G_2D)VR5dbTzjKu$P#d(J+sXZqZmXW;}Q
zmdzWZVc4ZR<^e1p=!e^GBBHtaS`4Ng9GVzDi${dHo|>L_eb&BMCQg!_(+I7-mlN}o
z4M36{AGe-dZ^fOgg%az^Ut|5ni*2&_y^yK?3*>8{;vOD}$R{MwsTJzcW>^^*Q0G<p
zmWoLzGdz&%Wd4f!KnIN3zbTYXSx=(Z$i*bq+N>ZGPtS|y9U|{BjX2u{)`qK*A^Vx7
zHunpT^rTSPB(oMLXAqZai$@_J_OIdVf6`80i~z#K{!n?0*^B@CJig+$>3jL0K=dP@
zr~3*6^%fxZfRI{3{rTInHiQwTPr?Xb|MT?%G|P9^Em^M_#&iIk*j~V4`kz=yY}{Lf
zkCCwqk@{Xk>^haqdwKQ&O2@m+0oN>E^$cb-LjkPSc7N9GE58!%?>{{(hxuVU$z;x$
zq{ZtgCzMEYRrGr*;s`q`L8@#9Z+#d09$7wbai4xKdmuo96{<YGIyVJXj%T!Ne`$&E
z=ybS@{E3?VC-)K_`WC-h`F&f`o_1BsJ}vZ&nB8Iqttd-fX3irZEGr-NINuyxMuwUM
z`<9Awr4y&wK#2br8uAa)D}fjaM4Pj<6FxMW2n^iE#bF0uN?<p}zN_cI6<rhfHVrPB
z@aBhw!G8k-`-D<n>)+%!NhA25t@1ygVgTuN*8N{Ego~qIZ=n)`!lyI;I<M57_$B^f
z-ulNA{Ehnv;hMY_dfBH@|KAIxwuZ7&X`1K&x76?fc)nTIU{S6=$(JD}@EOLW6;DgG
z&woFeN#I*H`oyS*&DS}w0mw1Q<*+%xs<?5k^9wkqZ1&R2*7+CDr!gYbXtT5k#5UBn
zC1K`yB+{xl{7U60!@;&xGynT~maoJT2le1Gx@m^9>b2^K#b1Rbkbv`hT-yCd)n_~5
zG80P`40k5}HL(7E=6_d&P|6>Vem$en`@cN8&MP<B@~E}<_k8^aI`@y-Owf4!&S%>U
zmBs(o{*nF@uZ8_zZzx+rE%^1tQ>(@}>=o4=q9+{>XVPu^&7jxu-ec%G%D>#m&(!Jg
zceTw(I**1LSDE}C+jjtfoTv(#&R%5hx?s1Qp`UHC-*9l0aUtD`@E?q);=63j`4E~^
z1wVXVrUB4;Rcixr<T?O0S<}|ARpznlkN!^DZa9g7P7gb2OYIewQ!7!V^qfsN^-g6q
z3HO~jBGh~Anar&G<yjzsD5b3nw32ojr*-&?q*OM<KsuMLE^LCaC=U3+NY_vz-BJJ~
z{9-sJ-3&jv)L(6w01jn_j?JLC>GOQI<6y^OvV#_YbpCMC`4DqdqjXWDrzG&`;OMy2
z6NWzg+?$X?2h*nv+`*xYR*O>!mkZ8G?<k*kAV@L}FA1RkHW{`F`CRC&?lcHvh~D5=
zCrmdE&{6?_Qq?i9>+vB#9GZCj(cbdq2yKt&!<E8;P`#QVCN>D=$lyT#as>2s&f>)0
zm-og-=Cfb@fphd0Y+bh>gg&=xm7jm5TBgkBRw0A)@bK8|WVpI)Z9Q0eH&GAPW%=Bf
zaOJ)VD=9Usf|()|;sT)%9r}rVy@Gdgn_zKR41*q?A5P$PBJb2`*;TOq0^0xQApYYM
z$q=wY=34#k_;b4a&u;a))aATVcsV6CU;caV!vR+Sx*vu~_y3s7|Fb~2UqESaGKr%9
zo!aff`NArnUj`CoYk0~v%?40D(X5iKrV)h8{b|dLNllS|P1XKarhd}~(lvqpz<g{X
zE?SQD_r1xlzs@*4G<uC-3Yjbb#n)i-i0Mpw9EzJ1o~klD`Z_%PL2Rc$*jg*;hY$<_
z|6Y1kpkq|%(5r?sIR3d7kXXJx_RMnXC1xwodXvz<=g^Ga<V_H|E-DyqX$b$jKucC;
z+vWGR1I$K!sf!7hDX&0xenj2*-|Qnv1P(9_XgINz)Bb&v|5?uol&`}#={RZnf5D#V
z4Z8|KCgZ=pmjXE3523430mpi#w(k_-Lqn{p+P^_6&4ym8GZhrH)3zp-H={#FlDWm@
z^hUF`4=ss$s@nYzOMGfBREL_BbSF_NLpanNP%g{OQ{jwP0S{M>4yziH(@oM_K1z}%
zrjFJ9B@R27T9ATv#7+2DreCxC`zotFm3(_C%T#$>pYtXo=pI-zPQXn$X(z85ho}H>
z@+&`VpAe*RHn4tSo|d8~w}I;I66w?y4_sO4Whw_|tNQh4z|39$m6HHa0?^wX&+2u@
zSxY7O+e+2d)hOVE6s#xOW}IaXosu5MPAQ4{yti7MARDbMH&xHmEU58;RehlV=foT#
zdIf|2vvd7f;r`BsU1IbV=+;SpSQ`H#)wA1}ziVTk$Dam|$^6!VsZr#UgO{E+oLDpv
z;eQ(jPyVR1rjk=&=P$1%v_3@sUa6&txzCUlX$#YT&{`Z0F_=Wpa)&3Uz3bbzcOskv
z$$GpuYoW4vVHte;)Bq5yqf;g)+M{L5Guzhe*iUqzU8Y*r{rsqOBgIwz73zJu$Nv7s
zVeb|$w%Giq_;gPtw%m@F<>o*nhvU<3o&#-d_aG*X^+oR+p4svg$A03`l#c@BVieci
zCpN8nJnO<=dvx^3#8{pct6YP%*W?Y5NfCH%tFDt_zuY>v2O|+kY{TBbphGS(Wy#_*
zMO=DTG0W<|8S4;<O_^U?%U>ji;3l!9sBCNU_*^<1Gju_ob+MntInV#j9Y<|+nkL-D
zwznqO=J=L)v7BY42@A-hSvqTzt~OG)cG41DE4D*8Yu!+9IrbSF5Du_&?IkClkA!B-
z@MZ0|CNpIoynB93*LrUmY5Ch;uTf!PNxNW31z!5qO~7R|Rb+D<M65D0QfUzCt(|C5
zZ+Jmrw-sJsvt2y8_;Xy3TE}(v`;%^^!!({ihGW@cewLn{V}fZr){^#QMYL?DUV94v
zwHB8+fz`wovs#`(#Pr(+wL;thyzYr&g}g;2IsUB>1l)DbIb-zp*cewJm4jT$yIESj
zIt1x-?s1tcfu(xO*^%i=Lgm_AhP>RZ>o1JA);Se+Ies-^2-unDo_D@3W2A7-%4R2y
zK*G5>5tMV?C3aplnSN~8+Ti}60#IiVl*l{bIA5J?T`E{DI7{-qi5GsOumjh`AvMV7
z0yy?BWqxq@JQK|~E3P*4?A|zaPRC~TG5H#8zngEC9glmlGY%FtB=7C-mXt9tMx5W1
zeO;bGG%LgRTPV9;gQB)N*7ok%rTg6VW~X6El^Dj0G#?dXCY|7{3g6D<5{lYJ`}wZs
zURs<w`3lkoF8BONZR(A+O2>=H_&YhD>1Dov{oQMfo(N0E{P3m=J&T?&+HdI2@8z=(
z;e!|;HXZ%F@|@SaO6kYL7J`IDkk>xul=#^(Y-Z55h!2z|4lk{MpEJeiR;Yr{TJ&Rp
zp^q>1WMw+Sn?^b~r#*G6EZ7-wZH&g~Oe5|$bGDv;#HJs=jN%fXl!dwa#&>4QWyzkX
zvt?m3HNvsnmMrj<_5sFzZ5V6BNh!2*<MZ=X%7WP>zxKumbhAZ0-aAL#_LQ8{IU}e~
zXA8a7Q#Ln1TbcszS9dtOCFHu|5`v`nD-J2qCs;^Wl>n9Qu0m>WAV}k7Yak8?9Zc6u
z-iFaJG3sc4OQoRV`@$#hrjlI&c$~#xTj8cee?==zZbiwf)G^4oZy?Um8`tg5{Axzz
zv^y?LOUD-!Fpz*VSZbw0kWka1Bs6S@^teiI-NM?bZ0A(CczNi23@M0ZWE^!e*c3~P
zS%aR_;yO8~LJuiuI!CL{Xnpb7scZuA?BgRyF}3IKNFpaGt1$&meO{{E27E!qrY#5X
z?pO;+oK3Y|T>RE;mD7Ukp_UP+`bwr&MDHoIU<n~Mjn_W-J0iosH(r1lxC^M9JEAy^
zoy@!tB71;Q3a$i0He#n&a)+E|Q__K=dntn?a@%wjzM&lbj-e*EjvZgF-Z0?p?n$C(
zAr(Gua(+_Ptg+!^hi@}AG@Jdkys&Pi?#|cOr)>6vF^dEI=V)g-EE!jvB-IMdH6ORa
zMlJRaVg)j-t$KRz$Lp7#;P$Ao`wH{?h_m<=`-;yh0XOE-_vhs1?FE7j-ouBJ-|s(h
z-AC{(HJv=%oriR|=U6sB4cbidSCJ0L;G8EsY;;?Cx3e%;Xm|7g#~l>JeWT};7mHO@
znOg^XRd)@QnYc;kO*995Dz$51A&B_f*DJohjdp7yYGtYnw%Q&53BMGGEKr;okyn!w
zt_sa^dRRN!0^~3K?P&i45_>oAQ4iN$qqOQ8iIDf!GnGYAva77BW8Y<kzjTP_g?`b#
zpU@^&(=aoJq~Z#*y&H+|mX{qkct~$J(qul(UY{8CU+!|f6$jk}Tyoqb+ok#4RVMjm
z!;c{m_X&R-neK3Ux1yUx$nz3;T@d4*5j45!oG&Es^i}H+Hsz3)iCVfPch+5C`Wn?B
ze@vP>(pXZ^BMCujqO_=#%p=kWh_wmyJWA}W6N{P}cL{QQc`V}n{Y+<+T&{zYY11~&
zqpdc`cwCTe*S@^y-tv|@&10!K#tD3vP}E@Lmkdst$YA3#z<V!dmf;KWD@f;s6iDB-
zBF_HoMjUSjWa-k+6^@X&t>+Z=UKs2ck`M()ivIZQ+Qsw`DJ6uuYZt+qpboHr3HQhq
zsYkoKK#9E(wPZU1=Q7V7B1}%~+X+vVTUDXA&mv79gh<8QoaIAp0(7y<R4eU_I;Rn1
zl}qLZY`Lt)K=r#WrDfU$ErQX{(!_WE-9nRA2OCRJS6VIhEByPVGuE1B-HPnqQ{UOQ
z4;~hGnXXVo^L@%Pw4->#La80yjBYOA`<w6LeJ>=piHWs0`d)4$7uGK2$aqR>-N&tN
z7U)b{ROqlup=%0(u25|R-6!M&<wl;3aT51ZCzy-qp_Bde@M#tQUe15CyNP+?bbf96
zvct|V`!gs8Bm9ps{(EvLjTdbP+JJA(H76qb^JwGEiW#5SD&|YH-52@Pnw`wjGTPQC
z37uxvaj5H)EVI#$`=Mqj#!q$i!^u?h2eZ!hR9WfF6nm3DVo*Rd4$AB2k?`cm0(=dM
z4`Mvy0@7k1O=x4B_8***^+t4kevC~u+U9QTopfd_I4R9(yGZr`+gy*$7~QC)7s((-
zu%qj0Fl$F<dh^*O#>013KZO;y%^p%ubA-}5+GEL2<*KDgkmo<2^J$G;d3K{Aug8GR
zs9c8Xf*~IQT)fj(?=)K_V<_#UtO&CIPF~{oGQXZ_Hq^Y|KIRdPM|m&raU*?pvD<H_
zML8F08c%k^qm4_a?UZH<Zt`q4M#9+1m)n0bWb)5YY4ZA}YO&g;E}R{X0_~;QbQYN9
z<cr)ki=VVU5$6`6d&9l1T%xO$VKzEM;uOe5-@EmRH&T-Dx?_f8r84H4*wNS9#WT|Y
zc0>I@htclHLwhgEzzBnggFa`WT)QRKN*c)i(IzRWlj*h&Ys;?RGqF3Kx1`o)PI<$U
zIYnWKv)eyJ`pNzAtbZw2Gy=4Sw~#o2lpB)GL6*-`O)JXQ_5I6_Gd6FIqHT`PKXGmV
z!oHRN`u7Jo*!}pydDhbev_tYoQm?qhTw^i-;Q^}+q5<`T1uUarIz^1;QYl`K%Sj+V
zqcvb$V-caoCW5F9@<}pSvfdYwukXN3*>~7wpd`BU-l}}V<xg20P4KC@c7Bo|%FXGq
z_?~}~^U*wNh?R!OgQK}G3Oz4zTf#r3B!HQAZ3i=Nq9B>~W|%2#?JOVhg$yznU9E~Q
zYG|r{Zyjf7^g3e`Q6J~28@WvU(b4d8(yS9WGi^b9)8oZ@+0Jaq^Rd%fKwB;BLB`PN
z!PgjwyQP_XZ=qHAFs4E|2%8BZQd)24t|+NQ9p`ml&_>By-VV(sZg-!shEDj6_<n<0
zWxi4P9(>wMY$$jp7#L)tD|qA;Uv3)VlqiY*Os>`?#=Bmlh4>Yn?&3P*WsFAseVEy;
zFa0`*QSU@FPrN!To4av+vT|183#06$wq?)zO9b~OLb>UZnOuyZ1q<TDpj(fVqfUPx
zoyWP06*QgBZFJm-6;z5}J5IF@uf?<kD}`FB^I(Cdzw(`SE*IC`M;;|YLM0uTusiL_
zbJ?v5;g;P|vC+UnaVn7>v0?OBo-_M%9*i5Oiw#<9|Mm?3z-t`S^UfARLEGJaZyn^K
zjB5)s3zH06AFK*CA@q%@9K~z_RX3}E!KqMR+3*sTJb1h8YwLuVgqi7E&;T(ACnI7k
ztoCYVpCFr!RP=Qic5??IyNXd7>3axU!?nCYQzKf7EX>XOVy#?kR3sb57AY@M1OqI+
z2NBx}^bcNdU9ferm_cpSml(MYK}gDajakfOxTn?*Wu9BXb$V1cgS8zbgoDHEPe|yi
z-z&Pn-NS_VUK+kvpd|i#;CTpW#~5$#Lhqus$M@szH#^|Ito1TEr`aBr+WuxX5_Zz{
z_CA@h86)?+HMjU^tf|FJ%K~P0Zfn~t)8OYt)t(bJYXtm>(k2{%uO45-&5IXwz`iuZ
zkeYH3((azgMa!{Fe}>Yt^$7?VaItSLrOGfeg1&J?)U+Kn*AtlB$7@)bBxVz}8T{Gl
z>=P~c`SRwsE%Dd0eO_NHL11&Fbk2@P+#*_>E9w*f>YIB9H<m`nD0X`NzDV3TdH%9M
z9DU`vsyIv~0Jc+0o*4fLPb)OSdq8hXLQ9fbi0(WR!D_R}eRX<y)vr~i(xHmiuevW%
zs#FwRNFg(+{fG?gjq2<a^&J6Qlt>@Y^OmIHfTIGc7sBUn4ZFszFuO>4aQ##_wv(I#
z2ZI@b-LTf!aM-YE@a%ey!OC~?RAcz1@Kms-aCJMvE)yeDo5Jwc1zh(uH|h7HB<c6o
z^iCMLE)kZ_UJ)oCIq?MfbIi{_3h#x5IP?!t<_FXItq+|#B}hH;K+JuElfbdbijT*s
zl#_c$9(mh1hM>#DVfn5BVJ8Nj>g9)`6Oku<4#r81l*nAq33&HTnHsiQEG9X^YO4g7
zzT?uH!U+~<MhWfbg$t%u?Iw{XzWlKyXrV0+1AH|f!r&~ruB9}QXg~H(R*zL~_BZf5
zFvxdoWxlJ|^lwd=;<F(*(Jo2X3POFd&{JXKAx9dqW3AEfvsPf|S4hJ)&28<-kj{#N
z;`i%`AA|63x|J#m6KYgj)0uiDxby;;0@t3H9N1onR%7T?N)nmJ>bAqA2OMzYJKBsQ
z-gnD*7c4!-Lbn+dXJ1Z=b)5M4Ua@D5Pp#QnN-iw;bYA>$ZGERjv;_xOrI}Y5BS?2c
zCzEBFS7NhrzcKxc_%Q}S=28<#+5EByp_4b5ia;L=c?)dJyHI5~eSK`G(_|B&Nzdut
zCgE|0Jv#cuwQ@MZy%DKc9EO=P)TNlQTK|o_jfx#5z!+|WLq!nx)31P+N?jbZUaO6X
z-e6~$ZFftD*1U$n&x5{7Sydadpq0T>*g<Pft5TKtA9Uttsa#CHQM+aa9~sI0Z=}7d
zwqs(~s9#@&J6|#fB%YEg<+6G(Tp!<66H`fR`EIB5OkHRB3=v%1?VwwGm9kjrR53Yz
zvP4xBwFh4@kUCEb5({ubu!U(+TIh!)5<>X+ehlUXCjG-eM8lKQ+b6?se$wc8@LKR$
zlsQpZX#H4&%DQCiajwOBzx$>s^Nk|VhMleY{3$;0C(BL(L4qF>ChnK=rEEjjN_>V1
z7ZY-9n+#6$_FVW+JQ3|QEBGg`+0bx(B@{GxeM=v<3gQ=YdEytZi#qTT&mU4IP@62a
zShIG*3u0R&i+jwIX-4PMtd0wZ8HA3zTD{ulQ=?<XXhM`LqH~L_74<Z#t*Tu~Pj|#;
z0al`=rEtRe+3kgvxZh_88N4iOLb0t7*!vkLbo|NE0d+a0lV3&YwMP&-`PG5mp4_MA
zs79|RF4CCk7yLfpE?F_n*D%O$C+I&cueu5tBUwM(R%*kQWX57)4Wn_mI+#@(RmSX}
zYJ8c*_9;d`>)4EFOPBZ**@c(28Sa9W6SX8MQDi*P_MV)8njIq`+bDK45X|L^@|*;f
zS*8S=8n-y$Da^{&Um;t}dcUBwus8VBUhgeBfGQvIA_0~L^NVuNc;_ro<vH)DA579W
zHgZqRK@|1}ci;-dBN++5%G-7iuTcvA#LHYmJJ6eaVGsB2#Vec^e4E%DOMSzE$C9FV
z2%oIt0zhZP{`wdqUq;6sVE1f<$cFGbUH744k(HL2KZ+J~J)S68C3NHk?F4rRm4N1l
zz$Jy|8zDRd_+Vi$^r)w>Y%pe2ZfC^f+BI=V2$)D(sEBSXhCeuz`$bAyoj*lSkI#za
z^OPuS1W6^0q|ggOSudpzb}s^oPCphX+_RAaKvio|<hb}5dTEPu(M`!$;K~$SCTroa
zi|>iLaLxh5b69cVl@Kt&wiM+R7d3w+PvpuSDT#00QY5G*?{?Ie(VqA7j5X4`is;wb
zhkJMN4CnlEO5?<bi@XB@sOpARz0CK?PibFm(6h#qSAl%DbiS5SwIS)Yoli6Pi9qN^
zK?!$NYd6vcak=5JA;C;y%}?b!3nDMi%bbP>@qKjcKDx73<3)w}iFYBk3!8g&X-!V{
z2930hnSjsP(IB?#0J(&26V4R24v6i7l_W?ww_WK7L+y&(EYSIY^Xcb+exzhgb&Kx{
zpMA&o&b@O4^T}Yy2Uc<)Tz{{v#IW&RQkg>QVy2Pa>7702KsB>8GSKcWv9ENb2U5}O
z&Ao`gK|Q}>i9&P1<mR0^MYm-z8|3^oTTn7YennT{Pw*%&rfeZ7Qo_Exz~E>wkFGv2
z<PD)UBY$X&g0Ik=5{2g?ZY>^I%GeW?7*0&>#A4Bb`6Z@TQDvcHiVN#ZadG2Unl@GA
z<EA{k)mYNQRf}V|!H&oK=XW>|PbOn|4^0Jl)l)|a&pwjcFz^dbdoX27`4J1bH{>=4
z=(<;Zxxtg@TwF?-k>7@%_H}1n8i%!G${gDTsTSc^1xqy<>U`&Dsa{eWMX?Qnu<{aj
zzZ>DbB)httt&y`QuH-*5%X6eL1}}HBP0Ng$VIbfDCz#HhDpdmq4)O0XI49Gwy&$a{
zms~_MeHBcaJ}z-cD}A&pUjAg{;ppJ7D7T-KqvkA?{u84bVnY&0&etwXqjSw?-URE6
zOCuLSJSdDvo;(!De7`?PmG#q;krR0)l&q}@@vHl<@J*f%4|{-gbK<Tu7}!8#nQzuL
zql&d@V;28LW<^Fa3SSn)J9zYYej8?&M)-J2mRn4aTjEw*$Tt>g?<z6A9x=d2QM8HF
zJ-}lzev#_X+TRkefJ#@n9|<#$1jPwQ-WyzD7iO(d<7;ts#4z8dyJ$+p{mw<O$POe<
z?9mTc@rkdbFo6|CTS0~lX3K?zZLd9K6GC5s4@TuDJN8(56S5SK^Z`xjs&E&IS-Z3A
zF-wVt+KUNp%-_Ex>U)Dj1{wkVd;0e@*=|G(ZSN3JkUrVepCNs^6+l2H-hum?jA;;I
zoS!(q{7xDp6;>NOoW?mOp6jcBXk~Oy?70rp^Yl$-^=Lx9?eRjNOngm)@X}|WUH1r^
zq5-`(sYPN0b@Sth9%eh!xD+=n*FNfEMY7Lk)1`^awvz#SzF6NnG=JSM8|)0+yzK2u
z<XGOvY@BcD$lro+eeYPmWzHJcf-h~No>-V9F)ab#Z$iX(o^oz0QjlJ05+n4yXGmd6
z=b7gl(>~&0(2LWsKWz<xN0^cC0Nz~$=;l8bnr?7&sO-Ke1CCbq_q_U3CPE~nv8#Tf
zsnei|SxmZ55cZqXe5KK0*DU1T8z};gmg(@X*Y?!CtGoLd@s<a7x7JYv3d{~Z!+W8f
zRkTNZ7y7MBa)|ZTU3C`-hi46*R#cZ;qxGL9tE$BkQF}Y6OjI@SI-v1skKP*;Gph=L
z=v-^A9fRMAL<{K{L(@1L^m5H>G;dAqIyUd)XR4zFhO-bx?HSee7Z27|b1&IUz8k-0
z;r((QvuANj^O3Ok=(BdFy+K`xw8{tM-&Sn6keyNqqB>@lY>A;&-5;j=2GT;awq>C)
zl~!{>Q_kl}Ls?Yzv&q;cO)hQ&z9?q%%6cc3)hP`y$S;jxNAC>eU?1RNUVQ3<wSehY
zE%*gixMN&a#-QXh^i(e=6@}>I64Q$x<3y5IY}5JrzJ|QcfM+bTN_M5fC#BJ$=@~C7
z@pz1Bfe4qeX!6>ApCW~c$$F1@7n`$1Ser-J7)q?OSR2;6Ml}K-5=#!NEH!rWfOjwV
z;k-p8oTSqH52QQ#zGLPq2TxNCckw=>X}sb>El#%v`H@lcPB#&$KOZ09v!Em5d$u%a
z=3E%G*rkra#4J+rZGZLD@d=v9c?@z#eXCSxY+m8q^Z1fAq~_h-x9nGtqcEWXb)UEa
zkjyc1T8e^ozno~#mNM?W>njZ?Rc>nJWd|RjF$2Or#lPe6*kz_A^g8{qY^M;ruKLBQ
zRsgOq1vpTf%5WUvc}`U*uayy)!FD{g5Bd><`dM1#<LwSTlEFRxIgt3*e5>My73(Zj
zbAN3@#HKZhY2=)gdYt-U#mwpoNXbaqpO}2Bpyj)9RsAL3k(GE(rtP$%GMc?nZJl}L
z#Bwk%kj&IX{7fV&8U+e|7dZw_cno`=0-4sFE@NCSODgjme^1@L=92?5C6W0%3De7#
zk%RQuvBy|7lyl_l<4~AgT8X~PI0X6g0EnW6TX9;6UH6YETh-m-U@_G0-lxK3Cc<k5
zGgKjTK_!-##``N}@&TmqYjIsW=#%@W<N0b~$zT^*Rc2l)5R9c}oh1O>EU1Fno^~q1
zIyXDW5KeHO)CC^!p3ZCM2JU5&8I5DHEO^kd)x6AeHE{($ekT{%{5*k6NTFY*JFF)O
zb4uP1MHNt>mCDEwpdG>XjK#~APD|%Y_1JFam8-MP+QP>R=wt@-aG7cc3@~hIl`Ijd
zHTx3W<;#Txg#S!A4P23Z+4yktU}<{m?Zx;rW5Cz_8tz1ZmyT%MA*3$M$!yiKm^%oy
zF;Z1TmU|6CkTSXkmvGojJrbKbBN9&pM|43;O0C)sf{;jh>9au?^L&`cS1iIpAjS3l
zsrAN2%~-D$koFjk{Oc*<z<%Rqa7Xt##z2(jTR)m*m=kXh%Fyk~%k$2H?t}st)@U{x
zBg|P<^EO_Egmds{n)$xdm^>A)Zj;zVuC(KkBZup?wl`v<^@8fNmCMy#p0a74{-O~(
z{xeopvM8YsuzJ*8K4gh%E=45Zi{b0WxWaqFKT^qHU6x)sMEQg{=APD~U>E8Oj^(vB
zK*R8W0!3n_w@^oXtqXyff{TU_s_ZEE1$_jo0X=pWP)ElHZ3kFU*^uYd1z$(T3E(*3
zaQvh;5ThY=`b~n6N`>$NL1_k+A8c7J=XtBGN*I*oGY@??k}2)pm)O6%vXd!Qw}<0*
zi!D{Dfv!cQw7VCZOPI6+BiNhi!b98KoGQ!+u8cXqkPjRWeNnPnuphbu6K?Y?nqeo7
zfssI5gUl)Wu5B~&_Rm}Zq4G+%MSf#e=gi}W1oWr!S6C>xw~vA_J4EQm=!~eb)<F@$
z@sLFF_KCAnIBL4u{2TkZhwYKmOfS+IXra$sm6>s7z10!^JN-p5Q%;@(ra_mK$3><w
ziO<zzDv97ncS@n|<m0haH#dD5BM&QNFEP{Sr8UOS7=m~y(Rm~nMY}mtL*E-mTszI|
zV^;J8KhU*SAQb7eIMJ!RdyebywPNDB4a>Co$#vt|zr>9gGhC|(oGwIk6HPchP<ios
ziLk)&P1SaHaJBTp7qw<(@eSE(G$WO?2<vnPSRcu>xaFbkyt8y(_=>SuGG-r0yrQfz
z`+drb_MGMAYXPI+%VX_Hl;Qot9NtOzdRIcxIb%*E8aY@vIUH5=@ELi~85E@5BnZiX
zTC7GL5h+KOdU)W>0U}T_+Tlh{#-l2ftA{LspFDoo{p}@?oi|1zxBu((wg?kzxPyDV
zo%Llnxz`)7%40SGB)nmd9}*0ihype<DrUJ;Z+DBD>SwRxjl~`)8-eu`ucdomZntOu
zyl&bnWD{oXioSga{yLEkGC@<IF%;$m4jCps=muhwl~hg0^e)Tq^0YII<_x|*rZS#f
zzqmz7h#EShqGL=-$Op9#X6@6aBeTx^;6;-AhuDB6lTsm@O5kMba%o#MCRJ|)M-S#!
z4&=&6o_1-yA<k|doBmi$v2LhVp~V#k-H|FE3B2T&?Lm!!OL_dlaL%Hz6~b}q|4D$c
zdCDIiXfrXO)}EEw-`W4WdR>o;gv#`)?k$EE1*5OtltH*t8VThropySVyo8Tvmq^jM
z_iQ_-k9zCS4cxmSgXpfPJsNrxWkIGO(pflcI5tR~ypw9>j3q$`<F9^(;f7%-rm=Q)
zLZj<9U=9g(;mo?(9fZL$!I+@#fTo*1y7PHO?oVGNE0LNiTXU(LJ2%nwM{bEXM<s$v
zcC7A+i63BOT8P%<i@&`HdaOSATyd-Vr#&7F3wS*z<m@kbsthg5J-h85n3~s(v+SFs
z=Nqh@SLLIA1h#$9(=v615ELh09(x|xx^UQC#<~>;N+0f(D-^`rqw<WpEji5&hKV1J
zUbOR&Jk5J~1yDJ#N`&z{O~Tfa7P7Y0g7Z2yIy`%-wONL=IpI$PBJuq=Q8##z$|Le*
zdYFU!p#l};yyIN<CVKIiPO030V3UW2#?<FH&UrKPPF|YHEAtpwt!l$yF@?4q%5K}6
z6XSVm1mgT&zrY_bof0@Mx^8Qbf~<R)p2ufC<a<Ko1mq}puP(RC2-UZ%OCw)Y9<e8&
zTn-1oW0W>5uY5B%M&p?5iIbF)PT11}qOp0`9InU$>4H%8BH2U4_dj~dJb14KWAEqP
z@$g`G(AwH#oOkO73P+3!^|FA?saf#*1XN<oe?)NBzDcv~*jo_BI@2%phNlP81D*8b
z7gP*W*c-F~QopIlTh2N$ig@5{0@ztN!y}-QmQE@vWRP=Py20WWUr1pHSfL<rWy2(h
zfx)+FYWZB!k)#?*5h?fM4JQAn&+EHS{*tFHhk`|(*<@Y>l9FoSxkM<l2~mE0w$aD#
z^-3nZD)nn*<x1e+m|0#xn>$cBk4rm7bkT)El<4jJ{o^;%k17g8vLoYtjVPrs!nBs5
z4Y!{SlE$QA&|W92Pgy05q_zn<D2YuTVjGx1BDqrhCL*<`6amxe1LFS9!&Aqex*9WR
z6$$xTp978wu7zy~=yeL67CgM1_TlE0O$H%F9l8l9Jz2=DLAXYIOJg*FZ)kg}D^>))
zu7bYbi4mh^G0zdMj4)gHD>G1#q@0FbF(M4d8j6NmLJGE(;$q5@u6vm#Z&eD_Ab&Ik
zt#j&02`a12zAcMMEM!&;&Do<i!|@aPEw@x@tzNqFCPCUbQ#17~I8<@bwviw!OmK-o
zx7n=sAPcK7gZJqlzBU-dc$oI~NIc#xjE?7tVUv3g(@mLlf&v@rf7#jyXhJ>T5Wo3x
zGVuy6WZ?_INDiBGwJ!v_L(N<_>st`ti2<vTaVa~Yo4b@zvpwf<ZQMP9=w$DaV1Ac@
zNR820<4uJU8vRVQ6~Qua2w9wj^oBE-4^U~JSNmBP)Q)L$Yr)iXx<q4lU=T@Vel%^S
zXkJm*!Z_j_G&+RZ4824-#+-*8=mj~gxvb%mO2{2SY_k^l8a;yD{GJTMppK?DiDX{?
zqB3{6YYritYdQ|SOGQtr-R@!NjQ!YiupsBNI_2nXNR~wOspto+E*u=y#D|u*msU`s
ziN2`bwjMdUMOm4@can8wgmi#Za?gIt%803_bX!4gKbjI*Ube#BGnhr?3C{^Ejg;5L
zo|Up2Srj`uQ5O-K6z?iiPs`s73Xe>VR1boC6Wmr&ci26ry$?MzZ~C5hnqk+0La{k1
zN!5YfAq%D7$-~nu*E|ZJ7~~F85pg?}oqnzm(IKUsbn0>@#N;nA`_<??`Ftx;mYtr4
zJ!a8gYvMMyNbJh<h}tPKw99nz6W%m*?SQV+ckuyw)or9_W$|!xmo!pAi{L(cM0JAJ
zI%RSr(lPM9VzF56{%bITb|C-ns)a-1Wo8cVDXQwVE3M&yl_xY?5+e$<31H7d=PBRO
zKBPLZ*OAI24{fd1q0<GPLxzc^Cz7p~d~exxZzR~&lpP^Z_;Lw*_MM@&9`V}JAYc0R
z(Z?tB4S19ffWdA~|NGiE^8=W^j9*`0^lt?~C>MDlc!uh5BJIo5@*g8f+ZuU3W1ypn
zQDnI;yZcNb?x;6GY@k^)w2YN&)V3MW&|9z?_Q2>dS`(57edq^2BDzO~VqAyHA^*}e
z5W86lrj2~W&WkBVH4o&lem6*li*L3}KihfKuy8*T;mkpAPs~!8!z6lQ-tv&{5zXln
zzI`?^C^7Gc!t@?`4R0<a&M?x&_i9>g>0%$Y>r|Xdc}mZSUB>*FOQRq3x^*eqIhQ9Y
zZvAHbqM%)_|Ch%+<ZM$=)B6q3fW%A3U8JO?v97M{SW;tD^S3@0gNe{{+(Z%Oks{i7
z(n#xH6uCPfpTcvvQq#hgAg@Vl_)^mrc1owpa4|;zuV3+;XODh$728HC?G=3O?n;P4
z<Dxdu>8?5BmCBPXuu%`EjYBz=3~y|m;G}0!4AQN3M#Ydy5Na5IvSS<}@e>&;W<aox
zG=v*EY?(6hp2+?V`QsaU(0z7!NQoEKaJ96&+lTaBEJ{vUc{?S@;519qQWE|qQse~r
zOmx2(T8|*5ox#Fw6@q^0S(qEvr6-+PW&wIQa_WW0m!31CWaO3SPMAep4<?f$^$(n5
zxQECo#vdS0qq1msF-~BAGdl!}iR_Xd6+Eec1<*K0cCnb}GEoThIzUb+%od}ZaISgy
zUrq&nMqMJ8D7=Jru4Gz9tNy;VKIv~1$@{}PocHEpJ?K%f@u#8zWy7MjP8Op)IO4@8
z;kC=37OVbQP*k0l%kmhkLO+n45q2i5_vU_0ZQv$%cFsK|Z198J-nNq@Zc>QDX(&a{
z(uiSi!BLz33Z@k)JuFAxJLh5VcAp?7T^xxvu8yVCXI_U=Lqj17#|5eT*C;4Ye&WiI
zzVNe|vT^14wY<cj#XZR|JH?Se?136MR@-Qu)#xopAhpSAL4!xM&SGtImTs^3=71WR
zef=XOe0;~&by29*%x^R-Y*`*_*Vd`}<t?x)cG=kYk?qtOZ0~z?jrH1`Vo%7uJ)<Ek
zG!BxWr;mdN&%>?k84w%1eN^*-C-oHM<mzU~;hNWg(Zfq10Vh$aAG!|#fj<(-a0&%2
zBf5{J;~B)kyk25CTj?HB@cn#mt45hEHLPSyTE&z1=L%^c#*ZD0uk(GZ8Awks-KtaH
zr}D&+i3RU#8$<K~8@Py;`v7U^>uac)cw99?pf++28$x+R<m{rMnD!5yI)21{dv^!D
zCV|3{3u&wBUM%-ntmF~UpRsFzVPF^Mi%&nMPNiiW`p_Fkyjm5P1#7Mqx@JbPfPl(9
zC<eO3k!-ZWx2#Tt{maw=k%k7ELZ9G>;ke_(c0ZX=V8R55L>j=FL}%*mr^%`h;X?At
zs_!vm)LG3bmiav{Y#__%_K!g<`#e|^6B?mkWjC_&o-^p^ih46HJuc=_2WFnzd6RbP
zzb`G}(YpM4p<QP_*$2($VeU<#q}P4Fi92x~U`~uQVO1?!C*_!_<dxg_3k6Ho2;qPL
zNwFdL(`cktx^rdOfDJt{Wr`9uhV0NLm(I5I3>yXKi^mW_c?5=&<1FRq6#H)vPAUGB
z_uG6t$7@5e^d70xQXsf@;}-^5q>Ci+%9WbrSxUdA+#n0&qB<M;Z`q3HvVYCL)2h1l
z+GYEipg>vz<u@i<Xp?ug4q~2X_MplbWBiId+3WUFZ3iw89d6WPv7bO2sUb~lG$j`+
zJvoIdNsK850ZAGeT}oF!h2fJ98Y+zRX;HecK47il6b<l6m=^=R1xXZ<+zON{<cV?r
zT!89Er3yHaEKGog-2t7oST$GGUDS2&9lN$Tu4$~!?-j$9ffvycM=SD?4WjcyE`Q_w
z(lH~;<P_=iUdocP{KiHR4uPPO)&J8cR(UzZ7p{9Y8h0on?)XYobO6y~?JDyBgaCf)
z0ux<olv|;MBB_9aV0GC7B$^garH;CENh(~upov7w#b&jk#43a_W~FDTd-elIQa_RU
zu>zq#Png%vd<ZztU(W89fz*hUHtL!u@I$e}m(8oV*+oRhciDz50bzf{^%nHL=|kn^
zQ$(Z|Rmeij8&OFxaC<TM2nTgFcBY82Wimuwk5C8%z6$DGDB;HSTp4x!_!4Jv*rkp}
zr45}y9_eupoaAqPxuq1$h?2!1a43?*Ny_5=$`LsQ=krY>lqgO%9AuZkxU$Prnf9QT
zey{iq4LcLw35ho;AdW#ZtA1JS!9o!a0q3NkyX8z5uWj<Z)iUdoXAAqjkqI8S=xbDP
zc<Oll022<%?p-epM+dT^C*>?zD_IPlhteRoQk9*&&EBAi@pvDg$y&b3E&}{${Pi*X
z*Xqm6PiB3WV0yGW?9|^9hD&*=m*;l1NHtGDhk4mX(iVW$B{PmiN6y9IToT)~Ss?s#
zh)M5+DD}<`ty;#m>WKEsPF?v#ozj;1abqIp%{}nx5HCx&MeL#G{t&~p-5fnI2MSzt
z*O(^Ug;HlJ5S5xcSQb0f(W#tye<w-HW-tQn@g1a*xUSGoq~}<`%1bs|r6(u|DwIj%
z?3fIuaVWP*m1Qm^{llS#Gd)yS6;86ITR)AWP<WlghH28Oj>=tU`Byk}cszhCU{bY0
zZbsz}ogS}VWc%$>;=MN=NpCZ_=f*QB=x{pk#n!tuM-3XkkX*dN3~AVS#BhIkx>*9`
zZK*87|6s9viKxeVN$J?6`RJo-YFI`SWhWWcJ{$d#ER>B>E6j4yWGzVcp^=&3L8p7N
z&_L{Yb0Q#i$&6M&@d(FChHKD6XKnxP10+CYji1N9UuWYDDN&U+{qXIbnmVi!sH$ll
zu2TY*SLmB=Xf;K~>SNlT2@f8oG&J&amK|913RR?R`4?5x&Fny=p|pKf)XLe)zrxU|
zEnnlaF-~wsxNB8JY^kpfmReo=EG7c~Fsq4SX8<_u@}65CL@<m#FG>3}H>3DkQRJ8<
zzD)Gn{i|8Mo1#<KAGGFM!jb@x<-Yo}@&^zLJR|^q`$Pj9sHhM3LC(_s$n<X-D|@QM
z4%@6*T$K-=kRHbkM@6b1E`zuPsqMqFRo>(nv-X-@nu=nIE;a<Y3P8_;a#li6e-Y@9
z<ytko`QgrWH`RafqA9_?QZJ#pub^~Ca|NdX`S*lPLJX*uhQvb&ApjnS1IJ2z`dxgs
z-$pt<CH}?);4GT4o8lK6wy6e_6$Y7+d#SEctpoP1Yo(<DK&n=TQY}e9Oeo!n&tZ|!
z`f%Lp?Y`-GySDvUP2$7w2gl)W^_gTkTWQg`*B7;{(tsVb`(Xh+-9&u2Z?zW;)v+Fd
zg4}3N+Q359iJgHwVxquXT{cG;g%_zSfktWZE2UIjj|aS<B3%$MVEaV+2)Qv1oN^sx
zByN<anNO5FCx}o<vFo((uHn<^@K0t$Y@0m%Geo`T)%?WmLTiT8L8N|!$aB_%4sZOH
zlAC+H&-EkDF)`Y*@5>Vs1_iyZv@IiGSQeDr$@Hug>^N>Yc;s9fH80B8W(2@NC*i+w
zsDXhuP`M!9^uF5!^A_zsFjfeNqD9<*$;`fQ%Gyf#lUQa{|Kwn(DZtB;Dd~*CB2=Go
zzcA|g?&gGIx&0{r+^yINgCqmklQi{Holq4$^;y(eer21EW}0LArf0DI3&rAEBGFm?
zyX^mr7!S{UMX@f^lkN!qKcQH!jKtY1l()X~Taua#D%FgGP2ccuaOj^^3i#@N_{jth
zM?dJ-|L6Vu-KnyFzScae>u@>ffByKNpNyejd*1(#7b*iKR%^|4`o6HL8kJT4NsP#Q
zAw&}J)ZDT*rvJ0$n4#c|0oVvqF@X^EPXNml&dFsl27s@4_-GCP_4wHVtU|4384Pod
z7p<-*nf!^>ulArn;i#}zlN4Hk=Mx;?L3!;A6S$Q1pVgXx4)w~``~7Q$`|B5btoi~;
zlF7bwOaC1ZU>yb)oT0;t+VKBZvhH8ZPRr{g<#g9L@joBx?>6efp{{-X&d+7uvZg;P
z<3F(u9PonI!v3!}BotQ(_p0M+G#*5|85bu`)DX$V)N6CA*6Q$bzf3s72iNHDsxcW}
z1k5#C@8mMmot<mpUS*PZhWdbB4X~AHRN8O!Z5@CPErt-m-{(pEPD%q%D|lG1TEqtp
z-7N<d{*Cran<D-b%3<$SJ}aq)%W#$Xc?a$M5X$08KpkWWoRT){XCPm>Uu-8vSXM_+
zuOOVpbT+&^Zl$5N^8OpU2_NXn{}fL^n1RfHDKFagsMxs;fSHLNgtUf`0(YC}(9)8b
z{;y&@uN>9f_6@0B++yVt#T+SG#cstsv8cr$Saj|FXhL`$f2-&LGfDuO`sF&X4+Rc9
zgCigwb*<2>XBGLQjscGq5CFA-$8IKHZlb>3*LBY<4>)txfF$CLv$~!Ya-2yn47lI;
zTr0V(2;5fvE5)Ml{!L7swBKYN%-5G$&ed%Fo2>#kA8vsb;JkRDURr}C4_gKL7QoXM
z$8tU_`8*t{v4qk8C?n!?Dc8d9HV5b~$y446>i=L&fQjDuVCIUD5m?%Pxh^AxC;RJ5
z&l|_fXag))L?iPsKQZ`EtP;J>j#^UzVuoHH`PX=7b=KvGjD0Ui=Cm)S9e@Y&Ze&4t
zoJ}b7tv0smPj^>?lp41``|dwrzQ2aR-K(EZfcNmf<1WCz4Bub7;dg0`_y2Ln|NN1^
z7w{z-VnUbyXAk|me0lX(2uZz=KJ&l#=RP5UR}Rz5$@t_4K=3Wa%L(iOLch57#Bw1H
zBgz2a4XqkXr&_1<k4wz;3IVJpA@d*-lq~<1aw9<KnuweH4_Yht_3l6J>2`vcX?1+=
zwhofR3K-F#j+-{3$p-sb|E)k%=Yra`+XEEV2U8`JEhhRU;gSA8#_?gGF)9YmYa{<2
zm)TaYb7zfnr?me_z(a3;<iBSKA(vNv^RNk775?vfDsDpf7E=)Jd2%%L-^aN+6O>iY
zgvYpPSd@QF{~y?ubBsXCp-S5qi?+j=BklUK5DWzy6Q(q(q?1+Is(YQeGB?!e+tVsy
z7dAoerY{b3o^u3k)|jrKN|hnq-ip3HA$!)-9`xQ6PnVtcgG$-%HK~$l%x^bfxPtXv
zI0txZ;5Yth*@E#LxZKm2#3ul9b@L+zewKb}r0J=#!h{|G8G(jEvdPs6_@kOFyZxNf
z;25a?4OeZl*sXYvJl$I^8wAhevgBegxi$?w+tMUuRfw+W9b)qR%1}KtNvH=yvJV|7
zSM~~_ylvWunlp(?GOV(VvPcB3eMp}y{gxB6(%7v*@$b6*XODUXtEgRH8*gz*v6bZS
zI{nXOO)|v4JO!>_QY*{rJnZ3eQf{d*&1T=@=J?3vw*j<TbIn|b<hH`Vv*J5IKMNfC
z9RvEs9YA21@q{^YjyIz6)PV{Gvj8U!r{Mk(I***H)cl1k$6!-?@z@ZugQ|_%2`d^s
zSC`=3{;l=ehnzgS^6!bK$*14GE8ixM2~kT3C<VC&--DUQ1`PRr#OCI8@Sk?_$KC_O
z7h^o#E6VVEa^9J5bUn_GPhW5t{<z%D;qg*Zg!VA5*Iu(|5K1}n6Z6N)m({o6JykFQ
ztih|jrK_G~yP~L58W4n30dO1J>rAMZ-(WOp1D*>atYiJNHEPq1T>4jkjI4=lsG}~n
zIM+1{KE=&<Fnw2alUg$~sEOUaPp=%MtQtj}entmT=eVAG@muHBXhW~SXTeAdZivMT
zR$W1Y7(j9&Tx*h~yuCgx(R!%M*w^T*wnn4_RrBuRDE&5a{$6SD1FayT&g7TS(C@J|
z`tuS1nT5TSo07NGmqxgX=RCinQlUV_bTx!2we;JT$jOoFaTmz&KghA?5mctzYOm|&
zWV$Xl+8oR#*4*;zc@59j=L>SGpmS5&*-pRB$SsgoB~8b<Be_hW=o5R2R=q=W+P|0|
z?AoE}#o~+@fYAMMv_3z@q?ZHCE6VavSbHskXl^=ZrMx0$v>&&)2rsewU`&B%zt_;=
zgsI_`dvA^39I(Euckz|)13IuW(Wlt&8aVE^Z<Tj+G2JrsNb)VD?6+t<N(Qogd2f2L
zM^s8T7fa$&B;wy~*;<yVxgJNdImrx0kS%p1^Qo!mu>sq|W@X)wJ;A7<Z7W_%G70m=
z|AOk!Y1pPIH(QNA<yu?M{A}KcK{m~JIG5UEBQABHJh&^)OFx)*cK#+{y{Khd5~heR
zs<l|EvIbJ^r`>KZO6<I~ONP&JPs#O8&D0C>h2Nv#Sc(bn5mZo~-(CH-8RkB}OfHSl
z6SwdI1{679G6>EOty{H=0v5k8buPmEVzxf(%~*$7i7`MfVbKz>k2`@m>2kN;Y_mQ(
z3MkL|nnDO~obl_IeyNmdPc!H+%XsulStiVT>|8uHOYgzt|H|UoXXA3b#GasOX$?8L
zaY!tip=MqRH-TSTJQHlvFzT>pXyD_iAY$`8Y@AT>u}gE}`Drb$hHU>)u^SFJjDRm}
z#my7AYEcU0<ZBHfytc2L4~ZkNZlq_6#SAF`ayI-pY9!V}y00u&8Ac@F9s_IuO#o#x
zuj4RCX_B(ypmJlH<vsJ7OW`?{v1t{|d+M2P<BDNg%{CN?a1j&?&(`+|(d~8f76??$
zJh_Y|{jdlGN3KC3;w3Ou&D@Lhok|d<vWJx_-+f&WhJtc%&TQZss4$?a@fLf~gI`m^
z<&U;~2oW*Z%nA?6^kdleUK4xk+4~OUU|?xbDs<ZqvIgN8)Q!oJ9D?Qe%f})p1hocX
ziMY|SC3Y&X)|xu`>m7jMQ!$~4Vs6MW)3=`ny!zLMlu~jr48MGRTF8+$Tx^!Xd>5sw
zp<7rA+R4gdrr+8QUxE42SV(%CRl4dciqcnlBRUpEW>y0;4-sGpVWcjI`qBNKOWIN$
zWERX~{+oHHVXkjaC$;OOlWs#c?x*17vu>P`3qCA9%p!`7$NBvu;`o(&_k_H)E5<hb
z&`Jh<w%a)-#=7O~68)tK6<MpL9s#fYfiC)Oz{Xo#%J)<xWkH#s7+_Fq3qN{7s^RxL
zTlGc<d1%w32mpwW7wIBtP^fFpbqSq2=DM@Lg83;rbTh!ww7dqRuG#7xmLj+HAG!4v
zRZ7;U4)rt(3O**M-A>k4e$I=w=`-bOH78$oUbx(=Le8TVcz@64y$Nm8r<c_XsCrHi
zrC#}gK;nf3@sb5Ee6Ia?{KY2XEPK9;tKbR8x3IVuSKc<4$sQ79ik;K4tU{QlvD7|=
z_dJcCpUCDGDp}+WaGS)`Mz~I%!B+Z^;oB@|VZT_RJYN;5HTXuI<K+`ZNMU0a=If43
zGF5fqL$o(sP$;>#(T4Sxomj7lu4(zdSRG!rX~1}@-fFmMe32|X&$X9ycmecxt=ID(
z!|Iw9y3wr3j<An##3<*r=FR8-Pj~P9*w@mo54TAgr!g9%vC$-rZM(5;JB@88josL`
z8oRM=Tkp!=yU*F@d!E1G?N6U(7G~X=nQP{{@0Ib<%19kiirhz83!zrmbDcN=wYEje
zV1`>XojnkYgpGrkpyQ?ITXv^j)VFep3l&#=SXI7PO2|3PhM=_>EX!L<cjr+2a*#w^
zebvQfww5<X0-T#&d@laWN?jE7Gz=oa#*v5$?&n8sND5lCH90k>MKLtX+JckM7x4J!
z_Yi~`daO}DsNBiMJ-Ml#@H6@G6QPd51~+v`q#y`Hnl2zyNw1K=AzhhUyv-}?g7X&|
z?f0R77Q~<@mZ;WtJ)cQf)f)$diKRL^wYNEEn>9b8mW`C5HyOYfHMMiM&p7c7Om3kK
zV;?F}rJT@F_2%W2<+L|Ug+I1sjM?p_qBj9M%}313mnn1hlnK<fS}ScSTls1@t$hZx
zS`1^)4+r1caIaIWQpYOdXsAvwe?9dZMaJ=3w3tnMI<b_z8kF5L_czc$9N_iks?$}d
z`cn4}nPk%3YPPyI?q+yt;+cCjBHDhQE0iU=9^_UEsjbSE|FqXXYcBVe+^*huBHOy5
z&vAP&{vMXsOhCj=S209*83NCJ{J8z~<%F?RH^DR$lhH8D?S4CP8OXl8PA8F4f&)5&
zlE(ae2@)eZW-A|aC$5HaP$)6oDGhS$s(`lleQaIaU(1jC!)Fd&#oAXqZ#o{XU-Dfm
zT}0R-O3-8zlkZ#U#Zuh~mxl|uH5OqA2|mQ*WoL^>O?&_e6VwqvS|8&~WE~xGIEm!e
zPmI#pXgWk92TlfNsW4Ck)!^xUip_i?#K_naeC7b{gi+uP6H5%q)Dn2dnJcoTHH<fg
zTzMCp_H_#H0WVHFOg-pjWQh#XFQ&!aWdpmtsvp>8Yb|(k3&To;<Ub#cHCS-t;|WeB
z8Ac@Ah2)*xEkm&PczG|Psg(|L*ECAB#QO4lXM3;4;|<ewxK6N6=8g4X2b#lu>oMhf
z)!>N1V+)-SPaBda$_i2&dSSiuSx#(?O{qVG-vEe~LIz}$uuD#yy_Scwv$M-wR4Edc
zr*f(4K3Y+|^-G+Nr~dc>!UTQH7{{(-;~DAf@ihDrS}UH7xKZRer}+=9M&0DptHZ;x
z?T<zcIAc#s4xAMIkyI&cSCT%L`U5TGmYvgxWkZOO&6t=LDEhvx)aq|MBMq8|d5gte
zi`&ReEn9KJmt&x)42^)zKRlvsOed~6hjw$ne#^wjVIu|AN_%g;E#z76FA-*Z&}!Ux
z^PX?BKt}5}PdooxKNdfou2YaM$_u)6({&?n<rOFpq(mB;u$oio;CeXWV&bxvderdx
z0hTTvZ>udQFh6BG7T{c^gS+6`mrp=sAZTH#!-%1Syxxy(w1=g`bC25FGMQ&Uj6~$K
zuNdzuI-PhfWfnokYXQb}Z$v@0BF@a^g3|WGG67&PWstR4*-j+g5c0ZRVw8m!;)cVB
zqI17K%?LV=UtoOrRfO!&i_j-x{F#OdF(=7Ond}v2GOjXh)Pp*c45$;h|4~u@3Ky3L
zcl@EN48$kmTotM2oTKn7pMvG=`xeADt<U)KE&`3bYZ<6G-viUHfI{9tE&x{&Ikd&o
z$@{6L7?-J~2qSSP)(g>DomH>q%2n?Dq2fPuJ%Gt|-Nd^o^=N!GLBmBwgM&y%;V}28
znz5Vjrnw&Z6)!O&qRQg2Wpjwgk}b>EY<tpw-$%WvqQMM5mfCfBvq|eBb~cplre#I)
z?!33gWpSWtfM>?MkXb%8J&5X>O<c<i11W{sA_Z=ynk~p%2lL^~OurZ9MB1x1MHpRo
z{-Ju5Z3rs}ny`xT(mRGhN~;T~dVVdeX4P8jT3r^8R1secP=kQHDJcP1UWJiQhfmY2
zLd`nHx;3ey>llS)#zts_KaY?+Q?Spa#qx_vj&*flcwN=guO>9<uq`q@_N7)x&Wh{!
z!9LeqA7xNcA<F!ejfma}mRvcNT1ZXdFwHs^?;7p0U58$`BoNU%mXXe-+eU`(hMAiF
zNl75TJc>fDb3+ZDk*45Hp~wVkrUzHDU+79gIY>q-MD+nmY@-u`bTkUj#Q@2bNtQ;I
z@4mhZ!EZVYK>(hnwSf0#o|K*#|3Nj^lk7JnQL!cxFMDVvj?;91WSghVkLXH0<V83Z
z3G-H7GO@a{v7y&}7UA#1zpn4gQPXS(4lhIYD%mUtMF+Hk!(r|Zi8|o*cCUs+E5<-4
zh3^D-B1Fuw12Vm!R)GY$t{@jh<C+-w_)99e5!PiLwe93vkRq`?ebV!b1dU@mA9S$1
zTk?O2u--edeBPzwCKNSN&!ebS@AoKo8mas0%|DyHSY#PX{c!{fsm)laj9V*DmAYM{
zN&Ba?S~V}0m%4CHN>n4V*M!V@_p|lQA9m4LEO$=EkNjZ*#h<f|BlU+l&Qmx!$t2Iv
zqOKoV{O<g84sVsEFCPJR{qeiW`?Y3`Sn9~h6$<VcawqL8Zk~F{siQj9BZBSJMo%d5
zjImMADxl_h71AcIc0Sc$*i)@+Be7l1LQ&E;kXM%s7}vZeNuZ?tt7MC9#P1iEF3SPC
zw|Tm;@O?hFEJormr8)R#C+FV0gjt^59T-ZmgHTx1kBBSCU1kpO$3)za5YRT;h~+hY
zvQ6LXm=}1FN~M$f!q))^80om+0UPI7_xT|>3B$=)exEJ`VEqX+huT2>D8#b%69JGB
z*~o?_x&@I8DlVLyA3k(qArZt27DB`mh`oh6ep0#kH4}dRhruu~CLs=wHMV$E9QbE4
z5Lu;y^C1*2zy^Q5eN&{T3Y*O@r<U<l(1Ciu0J%rA*gUCSXw2>LTCS$cPpTKGY?3ND
zd`xiJA<>~Z{iZ&=h(<XS84(&(J~M24)Qjh8izO{EZ{N-W<#{Zg>8IA2PlmmU*@6;i
zZDPILh8z}7!M(5(BOu`xk2h@F-KMEESi2>kj=#)iUf=kODc9fs26m{YS)!y53eF;*
zZwO{>USAuyS7y8M5c6*fU_zV0V`fd7;%7ViKDy3H;sC|7TRyQY=C=GrZP)^KpcNwp
zGg>G##GKkhmFUh0BeXxe9oHtB6`f@N_<?N&OBJS5)QUZQG(3Bv3fWeN5GEKV@e_{;
zB8`9Gg*!a)q>$D_sEI)9CncFg!4>J?89Y&^sSq`6o;@{Ik@`Nj=fNVF#TX(5j1lx6
zHufx%D%70d1N0v8BJ?D@2Px8Pry7)q1uvTp*Qdf9P(q`Rkw?|dG7W--MB2NV_n)XU
zDVL~4NbUDX$|0>F-68S)$-Rg)@S$<#S4s9j#wI@K0L9n|guwUdZvnDePa*Mz(7C~!
z2Lkv4ed!6$K8Oyc2INqUpm!T@6G(Z3_EYD$-(=XH;$FCz1PqU;p%@l4wF69RW{+Mz
zc^~P#T5DEu0i^EFAK>mg+m9nG8<M-S0n%TIJ7blfG-be@fFucr{$gDbd_VWz7SCSE
zL6o%DS1?f!{>A=8{+1wB&1bop7f+J>-~6hrtNTsx_T%QZEb`9W=H=S=lx~ST2R!^N
z=7N8LF1$KjG&I){w2PpyC_!k!IF)gI%h9xhW9OcoDOsIVES>L=Opx~F=4#><N+oKQ
z25~3<;3b9}uyvQUAI^@qb-=VZXe5^-C9hm-DbKR=*soEm3b*Xnsa|r@AJ+cl*u$Ra
znj=@7HfmFgS?9DFcoj={oeCfNA~AlyR6d@5Z8L2P(Ywr4S-x+XiesOMbv{aLnd8XG
zWV+829#Ve5ykIYl#|0!<!4b+6x{$00c4wzf<DPlGpJ=~`>-IU=Wvs-j=i!GqZF{u*
z6fYe!hP<MmN89oWAJd&U9&L9y!ZK7a{3%bXQm%y8q$b!D(_Oe1#KH`!ouvHL&Y~XK
zx%Zj820c3_g@)!x^TP`@EJ{SA(6pzzODn2pTgPF<RZt2YZJZW&qcpo@vp9Q5Pkdrf
zv`4#bBLzBauoUis2rMtBt8vX7l<ibODzYq`$0Jq?ZIAyw`FfK=QNqKH$Q?_M71E;(
zpPHE3D^OLb=vn?DY3Ap^91A64U(jYDUC8}Wb($U~hfqafkH6q1>OR+XPnApKxB;~-
zjd{~-{O%4P-N$q53!V#YLO8I|4~?Si;b4=%1{gU9O;3D3_a2@ocERy;mvvSKLZVcs
zON{DR%lD1ttmT(~k^h>7bbtd>7dUb&5VD_{<eUB`{aNX~k^YVrCckCBkwl~pUNZ1B
zPM6;^DF!P2)FG##+KS!Bgw(3FO>S>P6~9Kv9ae$bw@|ymDEuL*Xh~V{S;7OFNZM21
z-27`oGt&*4ugC#Yxq0&zv8hdlm@CDK@G^fOwrx=PhP3H7-wU;AhmTW>7`13$C<KS*
zMZ3SSIf%K7dR|QkPh>K*zP4eX(7_?R$`wt4ieGf4E9=;ONP5=TBin-fC6-l&|2#Nh
ze~lV75gd*uYhQmeK|q6egE)>y_fF8j3MUg(dzELLINL|Fv+)vaU!Mye<#8A6;4<5~
zgr|+#4}YjuUgkc<<<PofB*!;<ASn08k0{?I&}FJ;A?VQoQqufdL?7KySuzT;B-l>c
zp0(B%eT;%D&cjQ+AP%5jN28`FlHUZ>1P{k5yEqOxmOnFDsoyHO&l$H(IwT)Gapc|=
zyO7*5z|*}afe$m$YzWW;LuttGGU+PnLVI)2r`e$i^7oYX*p^b#$GebOf?3$-J9ytq
zYl&aE`1IZvMbF)3ogW==ITs}_MNFe{hNen<GUrAMfRb&L0zj|=NKpSoKLr;e7Z4cz
zbcKtyus+Y*q+2UGhdXx7&TnnbQkt+&pVki7BH_!gfx~Sd+epV=5j~@ta4){Au^Cli
zJv<~^I_g7XxvV8s3D(PRt*)sCj``4}h$Tub*tgHY^IA2{&~rr=bw4~B&zSzc-N)Eh
zC=f-Ty~69ij~1I@G2iMw-&M#nB!VOMk`OBJb^n%%XRBq)6l?BBe=V*YY5w)LH2^D4
zKlLuG4GGw6(EQq^l|qfu@J1V~FSbu<Df3$E?lOJ%O};y(95S?F&FuVe4<TCjC-`cU
z`%naeA&eEY$YD=kePD}W9g3z%x?U`~hWec3z?CsQDA+=$HdzQ|WF3n#t2nDLk8?cH
z)^*ydxCD`MY}Bm=p0Xx`=x>XI1Ks5O04-t<+`DK&)+Hf2Hh^bDzV4n_C7r4vqf$YT
z^PA!MX^`@g9a4I$VyAqXftGdEP?duyi9Z!b_x3n7X+<g(cVlX0D%ejJs&!-a^<yP6
zj$-l~{*})u1atoVZ0#BWVPVlHmu}x)y2WO<N5s&gR@x;eX4km6UhJg0QHfPPGj<Qw
z+lvWEECq*3v;6BCF&$Y3#SUyRSiC@+cK_g0SBT9J$_;Pi$!3CSk6gX{1ABgd0W?v(
z&4GqTclf0bL!O?!{E6~yQ(d#hj;ngwp$BvL^9^qxJTWlkZV8(@q9Y8JowX0~a4|y1
z)9Znly|YkPVJfzt<#D#y|I&al_u!}8N^kyhbtWSqzv%f*AKx8ZUvQa4IrOY1L8nWl
zvh_#OTE7tpKh$8+a1wp@V|*9l&GQ9#kX*P`ws~yiHV+9#J4R(q&d&TAC=4$>?HfDD
zXRP#Ag^kj;<z~UmvLRuwiJFa;oPVjx784?@eQ5F=NQl^EH^!;l7lG}iK6zbGS{6%H
z6H|xd=RdHCZDDeo2uW5vnRo#u+2g#<yxtDtR8e{69YtYps6lc>>z;xnl7gT5*$K@!
zbd-ltqBV+wPrg0zj$Gm%!xUrRckNP+QR)dYvnSav<8zphgglJ=vJ9I1v)%#XRo71j
zf7B<!aCBadI#~Z*L?c%muFu(g&mdRjq&a0u_+oodT&mU_qrBD+lG4;-H(oEX7{@(l
z?)eaze&w;B%g59Z_`#R_az4bbKS0j$n?m`=AIBhaOIXrl5;T)CuZZ-VP<B!C-8DVN
z#3B~`e75W&8nDw6?&><RTOg3ydJy(-Z+&#@u(0G0Co!$>SG09(GzvPdFNtBex~^VM
zvck2#nqKE8XFdLicv!zMaqM~?xdP9(xJ-LnfaN&12(Pt^{}Wc1OpG*)PdBeY3gRq-
z5FKR`q9C-c@9Pt-V(GWpNL^@eiO%k?&QTKAFzfRTz2Co=3oi6P;5zeAVt%Ai+;k=B
z=8~U*j*&3oZ<4U!bba4dxO8%H(@cM9O$5aB&tgKHgc=L>yV))V-CScIgz{bw3<k(d
z*fjU%C`?^sK6w(sBCntt8(~8K`WPH;HRKHUOkE?D{svy%giW?n{h%CQ$-ee;q$+C}
z2)Qj77r?+DN5iGW=lq~Ke+t6_<L7`1aOsff#dZ^gaaToZf-ij5*xjrb!uP(crMt`F
z$aEv;IG9j&8{;+3UCDRSuOUR5tPePR-|}L#!^m+mVc+-5vd|Z>1r7(XqAQ~p>3PN>
zAO6<vAdJlYd7$j>wwvbH&Y!<7yPBJFB>YVWn|%i(+;NdcCv0#;GOi!8L_#MRqxWu+
z-{tZh7llaud6koGv%uo}E~_p+UL5&0bQ!E!{$Fts*7^X?k2TCUS6g6%w_OoIdBsVW
z)&eoEJ=Jg3|9zJ{h#0{ne-P#AJO5XTJ+23n8;+|l{<wVVY#$0rNq0kKRYVhd*{uhc
zJ#U9<F3ihPnSZd1ngVqFr$frJb2Pb!7^K9qn3d>XRf5%9vo^W;b0atXdl)wRK|O=j
z0F&@^a}4(}O{*vYG=E3$NypVA7AWg(zC>`I3CtA2u3}Fx6R1<DSEVCeR%07JC*9wo
zbGg>0T=P}wu1+%RnRWDv|12?6G37$N$Ef8v%uOeutlmHmNDx(pulagtm%!io9RX!p
zYioQPNNFK|D#~r8uqah3J-erxm*@Ok0v@O>V;XDj5jl%le08OJbyv`LZ95hscLST^
z%OKc!GLaLQaS)IlJ{DZ@&`Yvz)<gzFyWJv_jfmpuI=SEZ0-Vla(#@-Le>wWxtIz%k
zico9dP)6%vHv<hxGV%%0xILvJ5TD(T<()>+@jM?9Zyjp6F?S}l&qeqTVk!g~-DnoS
zG=C**5gGlN^*vkvqAo)aK2ImeK<F=E_D~mlL=r&xsi+}8_4hdIULceF7z9fsnNd@X
z#A(ygyTV2;f~+z;4Dpfg12x555oBNJaOt06x0xXOJ_pR?*dNf}5(Quo#8H@LV7JBB
zfb*u)z4N&&dGg6LRzmJ1YsPTpizIja=BjUJhb(qxDC)D9Hp5bjx0CVXct6dZ(Fdwq
zuA)|x#7AUkc;J;7>R_U~anVXTOkZxI&2o%$cPo;Lo_G{ls5Kp3rjG<hU72_tKPwgi
zRU`1W6s3{t6nURNt(@ANW{ETmxj~2eL!0Xg;tmYxhl_H8AD_>DuEG4Qv`(Ybz~>xX
zLj#%Qsyv5>Vv)Zc5@*fGAzDU%MePM8^K1=TbdM<}I^Ro#4Fi+FyR5}qS$4kEv~^EA
z+>yXK;U|5*$Tq8;nm++ny0@GTVHiJaZG>4o&yXz#mV(P>B8(a_9a0D8D!U|~);}o=
z%J1aAo@Z$xmMLwH%C$wD$?vI%T3&TLewlXjU*wrOCL#488_8gprgXWw$vU!_E(>%;
zL2vrjsP%Z^Tb?h-C|rzwip(7XEeNj#7GJDzD}SAo-5=NRE#qYo&h65jn!F`2OcRSD
zvV6%eL({fd^UC+2R;TYl(D+x=^4Up_F87cqMw*25L2*n(ezqm@`Jo6_z>iFqFBnvN
ztY5Xn+0#qWyGvCEqi<>DCtS@MN%M{L#<iKKaj5Ofe%KC@pLD^{#zod0SniEyrAWC&
zH1DLCEy2*XWJW4^)QVxOyX7Rf!xCA!WY`S{DF_)^6`_Kj5r{kvx=a$51Pui(*ME_S
zc9qc*`ZzT|DNzhcm%UFh61hyb&l-M*^~(i|?Sm_r#U83^Q^@63Bw@G}pOV7%L&UUr
zK)loH#4O#^qRG^GAjX1{Rxs5)ltZ?pKG>I~$#wsgPO++HV--V;)|N6w%_2W5m|a1J
zy_{#zn0^eS7b5j24laSkZvQuKv_Ai1pgN(S@;2D<Wy+93x=+hG*|Z%VPCK!=!1pa!
zUdeBotxzxkJts&WbMXiE7n~X#(X7_oDgXFc41(c8^)MlO@O7B|hD{{IWT8>hpCu6?
zbQ<b^A%FX;GwesdiB1<Cm2AanoR1=b4LxUj*5jI<D|_hdw1j$y{nKDJMpKN=F`cs=
zM6}zE9D&JVztcNjh+r7;Erqal4r3nIAUs3~*(c;9+NaMVS_84^3!5B+-+)wC+9i%M
zquCDvia6~gQM0ii@K#puiwIiqpF%;Qj$gQ+H9NPLOP#lfM_X}i66;(oe>bL!ss8Bl
zA>a*Yb?}^X;4J%8aBF4Y)dH{^179WziS&s-GXuF0SKcvQh1<2X@j>TLDVNEO(8|ON
zcjYD2oA%1xZcYiiYWT8Bv==O$B))9?w3L~R3^EMiz9)?R(v%v%v(e=)@SNKm5I^bE
z_wi&`$YY5`;Bs`xY+8tb>QkpQ?P`3@Y|Xwbz-ctbMfJ$=x|BnY`y`YG=Jizt(FQhr
z(!YD>0$+t!HY9s-`j8MCcJc!4YBY;3+dNw)F@a8(>W-c79M;xP^)@q((G>Cnd#rlC
zQi_R}jcr_$g^Z`Kc86{*(spJKJO*8duXTDT==OMki3{g82Z=-)tckd3sw;Q*bLh&Z
zL$L3q>jh+$ZZC8@yA~!4N2biBtZ#G?7Ul$7kUf)=>*wYE1qiZ+MaVS&owt;l_92_G
z5R-u|czroB{aK>Un*EcH?xc!d789>hC<s@6nQ`LWSABVjP$JN-og*NQ-}_^B3rQXL
z$To4Z!hwh)=u8G>(Gm`t_EBR?grLm*sYkE8uL8wS=O$WsNtzl)Oe+%iLORPz#+5Cn
z%eEgQS}HQ82$HOG!fp#R=_Wu&G_vA0-p?m&Qw9uAl9yL#pclc<n~aSNK})oR0vT;M
z7yCkBk29}d;gxO0D2}@*gRV3}57t1<VRjU9U48WP_QjSGv=f)0%<Qhc$2~4ucEpY_
z$hz`fYtNYFLJi6aYRvV;OL&_U*kd$5nO9u*$dWve9}HrHli=40kUpY;*O}QRCa_G{
zwmqfphZn2MaDY2sy$36&B>R?!0&Db~<H{`w%h59BvQ_zyBWb!)*iMWZ(b?{0pLxA!
z`j7Rm4_74ZW75w`><!@tDyo(G-jA2Dsh4q;`LC?MEt>jgTs)JTuwR10xON9RZ_->$
z=Ch2f2gg~M#7xXs&J$;AFrhI$_*BiEs$5BtGd!n+JvM|qCTAh&Lde7Pb?xPscNUGm
zG)>h~ox`%M?<RV94p8aev8f5=E&N=4ntSlSo3HErj^J_Y?_^pd3ESu3ep?z@p#t{s
z;TY>!B#}nrNvvQG4<nIoMLG71GtmfMFQk1~0G4$G-O7NKaq`c_Yxo6~S7)-KyZ40D
zLKI;vcY<Az{#lT|^PCr~!^WSlu)FAaqT#`Fd<~NG?Sjqp2%xX<VEOM51&px(I`-4o
z3*wXRl*fm(l9ifR#qBF<78YfBJMf1)j19Vu4q}JwW1++j$eSM}!x*{1ZZsEO;**L?
zd@mi{xb_}}1=KBPO<oo#%`_$USTbWjy#sdJp1yAUT<!HP#zjd<8ob|r7sq>cmh$An
zGw3FfaWked7P|E}98@GUOx*nU-3X8d;$($SGRsqp;$XNq?!^b34~{^QAwChaJU4>A
z1B^(*G`VI%yF_z<N<GVKz`bjaU_nwLzp$N{u-AW`-)1sbK#jUS%>_w05kWMToGFJ8
zo4D>ylFvVW@Q6J*=0+1Q>A;LUg@J|&pg-PD@bV})PJ6S4$J_7o`RLVv${A>-sSpk$
z*mGa%D<8w5=1$!~G)rmzI!Ke){vwYuc`?p{c4rhJmT%uPpfG0qK)7NQ9aSMCl3!x>
z+v?a={o1H)&1bUGxgR(bvgbjgR9+n%rl9El&|n|`)3QS7>pk?$`!>AW1|@h$OdxJL
z!PyTV+n%r-6i#txe3l)X#!AS~B;*iG<^0A^XtF48pwTUNS@f$P$nLozn=kY6rF$tn
zcl_~++o3aOE<QiA=A1s`=N_0psrl3=1U?M4D2n7VY78W6s*rXllvOmU`_G-f<YEHi
zl!*>w^@gFUk^_|JG{V>SB<9(^MHusy>joh&OZ{M_cWvn7D(=XG7D5&}h~f^>`vv+@
zbE3zQXN)?8eTqz6HDoG_0uOp$f4<7#g$eo#=e)Zg0*eHWL?X%Z*Yx}^8NX)e)y&Yh
z?Hi*g&*pIywb}QQ9NlYpV#mb_KMQPY;|!w$d@12;zY+VVM0|~jMe4#aaG*3Mq=Q=4
zGyQ<x0g6gd-m_=dUxX(RVm6B8!9ZRmeVu7|Mpauo<eUV#csZpoqOi+}36Ti~m1T>;
zxV_{auGcb?m+ebI{WxT<biR8XRov)?=c_qQFORM<Br#KkBEvflpOI);iMZY`{`~<~
zeOpd?gt}X|P?n}TaYB>U@{{TrSfar0WG)>7=`sqd+8X$am)kA~wW4C*dH4pXIPwQR
z3LeVLy&@$g)57gyhRLem6fpclH4>EKZU|W>LIVs#B@!hn^RYfk-{PpJlu}x$VW#6Q
z{J|*hK;-wI_+Li1-n}DH78T%CkUBnGUt+W~+8Ny(9zM0hG2Hp_%+}60x^mmz{(MYI
zEbQJeuy{4Z#NzTSV|zv{dSX%Cez6kg?g`{?l87jc^?iE9;wQFZ5W;ziPpYPuhyVB}
z@s*AvX}HItc3Enm^3sr8^J0{e<?+XBhxu|_!4vf->}9mMB7;zU>x3`h3U0$<9Fj3>
zKYG-=%*Zpqt%~3dEhP;h>cxhFe0c_424Ny4Bh8cNZ*z$~kr@NwOIpd(wwKsUgw@Mf
z6dtP#W**bue@gwjmWTY<VEU|8Yeo;00f{2374$HulYTNl64~AzX!_&_1{NUX6e;+<
zgDB+Xx=>(Qu#m5?^Nusz2QFibbrV)o&L*%8p6CXV54I8?F(I2Cu}A20km!@<<tJ%^
zFyhg<=9o<5YQqt+)u}N88v5XFeJEs@D<@|Z8;kpkZIjj@Eh^5)XR6J2T_5nRqnR;r
z`*M2`dgx99q43i`Wh3(;Ka)SxXi6=;;tt#-^-!2AsBC=Q6h%p-y=Yvh9@F%lf@4{V
zIx@X41&*k!2>5&HW;h#N@JmQgw54+7q?s*?5-8Jnpb8T&k~DTi*PGt|)EG*PeN``N
zJ6hek=Wxf>vabpq6Qo_uRx8}0(>lf+UH>LqRCwe%O5Eh!#BRpv^1wjt;h0BD*Cmtl
z+f3^LoVDT#?mYjfPO|D=l(clgG*PJB;lguJ!O4+qwbGHEOfR$Ac6-nszt3Egy*JjB
zWmFqEI+T|X!WT%3n{7vLk!J=A{V8`h{0g?IPWokXiG992&cHiJ0<7Ge6n&wC0W9qO
zY3sl=v@nzUIAVhJh!M|dm~nLF=0|bZpWpJ1<7_6lIfLzg**ss*7VshKB4<PQttB}k
zB>FPfO>rgN(f1qu@G89;{tA4{_L(rGSJD>T`(W-|nd!}2pUz8w&~PD|dXtP7RG>(1
zF-tMq5EoX>SIYz`Hv?O$T~7g(9;LonsLwwWi6B34?#;VVHiM~EG|4s!uLzMY>8z~C
zkN~yST1b&cz3qZamn}=I_fB;mhIKlzS}ysOo14()>#8Lzl8IhG=zZZtY_Z`Ao6YGC
z83Xut{%zX9`uj#iQ1$@}PcB4vGh;0->Cy~DqO-#AcDEVNse{IV1|MSueKis-M=+8D
z!SO7uo<~E7-!aVdA34rC6e=QntCq0J2@-M%uIQ9H%^-8EMlqXXZO1TTg{e#;yO1>H
z=Ew_v#JDm}&`x?)N|eSBOl#i{U@^1lz<O82qb9tavCSWQxxDD%1eJvfjisZS*!-$1
zF|PgEMV-sL*-Ynhu{8}-l5QlWDnQ$(>TGiU6?gs=J$+0tn<xx15i+^M0~j3JL`%}u
zrgjx(OV^%<vK+p59DSTNiLdruVn3&5wTI6adNoSjNPTT*Fr==^nw)Xt{2g6Rt0g6|
zTMTAO+%evQ(uX(F?O<Wm8@?72;pRYTo@&9IaU0V0o-r_pY2M&FrBaEpAo9-yQL3Ol
z;+~6QbGM66v;YMpqzj2LGZ5F+m`i*)!vzhSs#(u4wYG9%2(JFc5k|bn)ztqSLg6=t
z{^hcEK-#<^hA~pqcQ7{I@1oy71v`6pESq=dK)`30eDAQr^Fdl+zY3-LW>zZ*zpgUI
zDO$ml5J~V|pcrMBW{fz5WovT*K8Ct9DXybB0JQsYe@gUSJz;cM61<A%#Rna@AxrBn
zr2VG`A0}DW4Ni_q^H<cdJao(_2t?yqf#BU{+4ASlP_F&YikAu5Pe->tp3!aw?g2?C
zoCAeeIz^U4<rY7i-!U=?X<u%4g~#iZxk6!MzuaoI-b-+EfDDYOIKP@XVQUiXB=7`7
z#R;iU(B~w43HO%!VbLv4>6%qDMOm~k(#uP0LQxxRV1fZ^*%ETWK9`Gr_!Rg_2aE`O
zT1(1jKp&#Fk_`OtR2;3lJJL0{vlGc@BK|j3Z<xl};P<Z|Gm|xhWHurp+h|8B0%N>i
zQ?jqsM#x$y2c4Oqaw`RuYidx#-5roDZ$^1}AysANzYZ<M?NzBs?62->jTT(AD+!+C
z--I?3pEysjlT(h@Jm6kzaIG*n2jHfHNp%wiuNnxzEuec9>!k)fr0sZ~HEAR}b5>5L
zZEH~(b+g~*fcX!6vmYw(8d5!St44;Kpd?R?xE(==xG<=DWK?hDZs~KB2ANr3?nx4j
z{SNjS|2>pQCN>yK0^1!nM$$%8I6AcNAj2nm_v{J*j-W`yqr8qFC*JQCVec7C0?nv8
zL&ZCTTEDnC8aOTQPZhu`^fR<-A|zn&Ag{%reqbDztQTlFusS?_ltxRry4|#j;96XT
zRkObhI&cPRlP6@0`Wq_Y2xelyHp$I9A5A7Oj&a!Gi>j}nZF0k@7hZtB-_MPe!knwM
z018zUvCxWd&2{Ad+ORZ&RQpCiOan1?<1Kjawv8>md?ux9G?#D{+6prG2w3CWci0Dg
z?~)7bkf*jm8#J{U7kivDZH%EGH|Q9N`PvdX*T>4es77v;cS#kjzt%u9j}}U7IgJQ0
zJI?_r19sS>l0a9fYTs|(7t>DK7D)0YmQ_f6Dvuq;43k9*<bH&l44tp46LL&tL|2Rx
zwm{yHf|Z;eW!<k>eGHAN)P0ywJXMiw%|jDTecKKa45*(v+rRCM*xGRf?&GO2h%Avl
z^eR!$(3*;qj`eCBvCV@YCj1dU!+Za{LGy<PF<6oiZT_<fv49`}Ys-W(0-Coo_r4$w
z)pnm2Y2Xc<u_|lcIo$&1^r<xtgpwh}FLy@7^~@23y@}tktT^kbRA}?6;%(#F^dR!1
zo;pB@@PHtZ(R*h@`6)4@X9XJU9nrZ0yO4p6cDwmA+oZx$5;u%m&5p<4cWFT2J#cZG
zoo9AHosAZMXaodU3w9g>nYu_TT4*o{XMmgo8~s<P(bSkeXqw$Ew%TB)`9E+j7r*sM
zplVicD~W8F{PngsO~|kSpVJh5?Fp`aF@FrFhqaM)^h^Kr5<|dkz02R%FYj`coz@!G
z;7G1J6N|ruFuxCq^hcCuI-`-)gX+~7eLps`6hK$W7Ek}jK2ebHJrSFMBCG@tbVZ21
z21bksdnswc0GTu6BrK!=acHk<sOFrCV$cmpQ4bbj?2Ko#l-$#}*kGeq#d#kK;TTN>
z_e?JQ8zB-XNVa5eq~haHO+|$#P40SXc!^pY{BA?=n2Iftdg%5ouKn#1nBM*QJwNj<
zRJ9O)S!!|H?F<u~rrpVaP4)z_R|4E7f=d+QU+{6C)ETkd$9W-8`xNlO5l_zFicd*{
zky7h!eEmxv#yIc62op4SA2YHs5i<}Rbj|a#nQGJdr*-cgIdKJQqSIzgP0UJ<>C*mw
zFQ71E@b{bvDZG{agW75a5w2k!%PwYpuw;y7p=i`%p_)M9t|oQwc{RhRYH`>LhsSRq
z|LHVv^I!e<{*5f7V#=v!W{l*0zS$4)QS!fBV~b!-uKqRMxAe~^|G675d>#-DVIG~L
zL)>JzKQ{WmTDS2tD-<GjwJHCS)&FWjisG3?lu<co5;pWDev2*tZ|he`GOGB^uXE7<
zrwI`KxC7V{YS|F(JWw2}sic=#`R|U8vw;AIqD(c`6R6(Y5(e^)eeMJN0Et*;KNK02
z!vyGx%Whit)BR<0VQ=f=G!!z))yas9(VH}yzIWU#THz(oXr$n3j4!2IZ2+Ws*B9JY
z-aqdT0Um*BN4`8?Bwr9vJ;v2+e_X};k07NGSOKbwk7i36zLq-aCdwZ{dcS<mteDOi
zTW<bGKK7jNd&A1S{8s+4A07}+o{qI;&NGD84}V_tZ()r?fk2C=Wm=~FALYG$AOs+z
z>kj@aw1407N0>aMK&yjKlY`j*BigrDd*kAdPp>VT{VsvRVzGn;N_HK-XCg44t03Tb
zI_u`Np<NL;MREpsnq3#$L)}0rF1K5fRz~2JC}7MM3n(XTk0*4NOAT^0{X+lJoP9cv
zyg#v}(GYb#Y%Hnt$y3^wtPjOXrO>x4ZdS-*Rgh3M5gll1=7<2z3l-pv1pZdk9eDdP
zBoL=1n?UT(GNvGJMflhpn~$o7#Fj@j&9IS&=Hit#lV7OE&2E)<9=)G|OxWm_joS$f
zEY-UUXpj6}W($wTv!!Z@Y+k=wua}4pRn1I)RxklmE$c;#dVg~H{ApFMqN(-L+<v(t
zyy$RoNPy|cAd$jE4iuM>#eI9ds+e|f2&xW{4~)9ByNj4k9fU|I48sy}C-IM)e*}?4
zO*kl=&Hc9?j48YU6Y>M{00)A8WX&wvR#kBTaawjzZn)n`?dEzsiglG$mOh>_MCy<?
zdAQo=WUO43U@oPCiTosR^nAP89zUbv81W8S{jIQL`}tRz(mRpDc(by`{GDvL6%9rT
z<o~9+_fYzPB^sFAY5reteKQI}z+%4SFl7BFQ2g6*c@lu3FCpOczq#CZFjErkvy&8A
z2QGrn!I}$%BsK?A7fe0z|7%@1fMWv*(ZUsi3z@uLo`kAW3IxGuTF$!A;>|1n5)mDW
zcPLF0*%MGA7MCJ<c2QuhqWdEZcFLb#J--Exq4Y*wp<TQrLap5lZ|b*xRyZCnDEu0C
ztGQhDCl*hzZ$qs6W4+!HZ}9@dmGV!h(QW`{BLU*$Pyf-0wlf&)3MYr>&Yw&E+zZHs
z2gE}hc%>CyD+|qE|NYNhe_s>;bjZZ{X_)%IH9owFZI#Q_O8vjd95(WM*W}q<vf6)Z
zyrKq1xi*ihWv9(I({2DZq`N(CmuEe~>wEv7F1)$rTfFdvuT`wC0ZwnzIUdbju|8i-
z6)Rni^TYcC`7+^hBkzB<zFZ`>V=L^Odfqml1+cUbP#-pqd04WpeFyC-Yd*oeS+X0m
z?t&uqQ!G}38{CBYFOg+|9}ouT54Y?De;D+SOa1=nni#l18cy%AdzMJ92V;PY32{+S
z?70GjLBm<@*ZZ+R{*PZjKW(75V<`N7`~TUol^6RF@J;Kt?XS;<_fP)>o;@WHc<v|b
z27o`_1$gfMmS{b&(eRI0_ZUHl9*;a*m%*W)fcwQ0vwDw$gv1kRBwK?w8pX={ahw1q
zg<=!#829F<u^_8%>iQ+5ozb*ZPtTXj3_Px<b$AGlgt}GF$Fox)<MPv00XG~l=0d#%
zGT>mlHIMOvW%Duk#iP*Qx{wL)02~bujY<V!<szZpFBq9rNqnKsOk%F(K(QQi&x-pz
zSlX{1z)CENS3~||=sZM#X<=sl4s>a|AF;}J8j-6h;MgW#-w37ssC7T6Y>JeSep6H6
zpV+cR>>&FVtR}C51f5Ba0lp*XeSah#-pa%8`T!vI_yM(Ts5rfPQI&H+rg*YxJTD`?
zo`^WEi=>Y?*emwmEMQQ2?NIC0mGySXt{ZFhu5Xg*=4Wx4m>}^(9l$m9H<`+h&$mc3
z{RNca=>jTXE*2e>{xja;dt*X2pp=1&l!{$UM0zse&n-D`hL`@#Cf^2vGrjwDv6keJ
zYcHiHqUL4FZ0_mnT|f87;h)TT*%AacE{ilk$CC@=M3FQBwFUyiT7E$SwOT^!^vvli
zkdq`3sLS=`8j?u6@vliP9^}(aeLJ;cMv_)KPR4ujnSd%W7d@_oQ1p3#>o+wekq(pq
z;xhTsW!@xqZz?JuSHGuTSHjSON3Scu`f|6~UEg+HI-S}GYf%M<M27YLg?r*n-u5wo
zYeDQYW)f*%lXBk=oOQx`(oub@`n=?NSc+pVYcs?BI6nrii@jc115k$j>Gk?@gOkrK
z{41Z}y4`mBxnFF0DcWvZPyGQaJY5I+P?nUxNb{DKM`yXoVSQ4ZRx<I?<7pEQp2HVd
zJ2x*{_TIe3)xzX=UKnrlW|NGd2B{2ZV6d3^(Vp(-ytGm3*9xdzDzT^XaBoQSrb=N+
zcmJ9(<9@ip`Fw*3PO~Nu&(pan8VnhiYB`<T%TqfCIxI<K%OIIKT?iQWq^5w%E5YkP
z=0DM?Gk9Flo{#Pu$QenRUVuJu%Gw?dt{#9Qx|2Zeq86avaO&NKn=o2;d<(ZRk#+R+
zy>e~Fxv%4aTqs;;-tf5w)%zcinQDTA(NCgV%-uU%%eC6Mmf!h~(A@wfd261sdMZ?u
zo3-|lll8OOy>6RV)2ejHJyF(E=5aCHkIl4Ts9wj^a%L~K?;4ZdoikBNA~Cy`Hjs=N
zLY1gg5<ZWj2M>p94kesE>Vkyrw?A#uqQ69Z`R59z2X;+rt-2>qukwoNPOEpmd2e=M
z0)v77yhX|_x(SV_7Sw4%2`(AFMAO`<11ScZARK}HY2_-PC2v;+DG+kfe9`Om86re~
z=jOWH%YzN0?d9PR^wmySFEi{{?TnIIue`=9rcv64ldrS<la1XVc|oq%FNE=0)wL_v
zcW3<{yn^UNXsgyWX(^`c-h3qla-6hp)oEjQg4FX!4^RBD*YGwNk{^<3ju!$4CH=a@
zD6hR35;_PS;Hg7Lx-D&XNwFWJ%%#O`+Xi8&yS+ZH;-#PYk@`t{9LTT70Hub8)>x8T
zAX&Yh54ENmMBI)lg<aEt!_3k~!B{pRh~8u{kp^I&2m4@HXs`<4!&W+bFh1wwvzIai
zGwAh&Sp(|`zuC5%S^`(;QuRiLXCi+|X3e`Uba0Mv9nsXi^8JF<%k<k^U6gE`6`A&j
znes_T%jywklY6iGA+1Trgk!umbQP3y%clMOjt2dkqxqtao6C$>ckK(a28-Q!x*hGy
zsFp9T)LIc4p-^Y9-R!IPqg|olnv_%}!qR=Kqpnt`p;}M64{7b25$xaw7FP?IJ{<Hx
z?;<|*jPWy#{H=EvI&UGb{^D&cb@A8qPv7#@w19L3&i(8wh?)<Ri2=aMy1K$-L`+`A
z**~`M?nztOt<;fvXQUvwi?@W?v(IcFg8Pp5Zq%s_IsN6TpukrWyfTf>FjTK4hm2$*
zVz%|f0kpbxn6T~G!m4<V1AKk=hn-aKp*riuFQTb;%k+pKXz60p{F&B<m0#&LXj(7g
z%VDX8HsO7wJ{$|31&=Ye?PS>8*xh*4UcD8R2Y+c0vtqp`^<+un><SId3wCyB-4mn@
z5yf;R-uvnmBx4k&98YcBFk5YK|26~SN#J>Lq6_&<FZr}QbQ9e3TQ3H5EOi|N<Rh-w
zm6b4#g91L<nw95eYK@4e+P+tkK<8WGjjQ<JIE+h3*}%#{yqB}UtH>%Q-V6BS2Ox@~
zZ1f;zAokLMksfKBx+)*c^gN=stbN$eh4P^Gz+{|(e3r<!4yT8|o}67Lz$V4ryPZqB
zeq4R^Oi8Gs14EzGh1A8k<>zj_{;gJ!jqLH256uIkGPJ=<fUZR_{qo0dt_$XKpl8|d
zHX1H3Fd`CqV`(n#3_O>uXx@=vLS$VG&7q9PrQOL&B%e<{+BwWXAh##w0_**c8LTz-
zU(!?wm7~UQH;zEzM=^GaJWc-73W1#T)y}X<Y+F>?butq$6fA`(QgB4$E2kFKHs1KF
z^I$nBaaf8Z4ZZx*m@kx)c^8+vpYT05u~vU&SPt%*YBJmKj0|HlM$h9+rO3I2`?~`H
z1xF+}2|2v;i?8AamB+rg7W7-W+m7!n`2%eXL8XQqvn}+6hC=ZdpN#t%5K<79SxF?4
zqDCRB^|O}ykZQ&H)-rs#X*SGmJUGy`s%NW)utEkSeUE_Lq6BCBEZC=cMnHgHm-c#?
z>X__v_vmg-ryUyu^y-BCR(zVRsKZLB6@-t=0}*PFx@wMeqc$WFK9vnWBn*eWj_kf-
z6bG29fMz3o@Yo{46D8$BU_@};iw*V<-1aJ;&tqBv_26jFl2r+LW?d<ulGii&Dn6s)
zsyGnD?!W!0k}yR;MPN?ACzwjFC*;n4_5jSQI8ftR5g)}`vUGXOA7sOz0))bi#JM#F
z9vup8t=jH)eh#e^Zwp5)*$z=p83Y7L(0Ku4ix#!`jF~f^GN2blim}T`MD5vCvEO^O
zV%sh+^dOLXg4syqn5{P|`)$QSnIZ}y`&g}LmyfE+JUQ)~GZ%_wrR`;LVk)|ICn;uS
zqMPp&Twwu+EP8r!>47u6$bKiY|Ai8EYf7OhKv^j|vr9XUm&v;*8L^kr-<zqr_?QVi
z1YN^kR+V@OILNwk6=b2!6IS5|0mlfqCHg0|uZ-Wz4CC3pIA*c~bIB=RZZd=vR?oX`
zy!>7Q=82Cz+zWG3k4Hw_?0asA8DCbtg~F3BoW155Hg0u+I)Bneh8edlyLpMh{0!K|
zzg=WctjpmGEOz37nTzmuE|WkU$2kes^4itV_UIPF)!2++6c8r#Z%@zs7{Skk^bsK(
z;vpBab|(YUo)?A#!O=Yz^evv*iVKa%mE2~sE$i5`XF??Tgjh~J%Sz{-55SBrjbUVG
zoAcyWr7bdS<>x$x0u_wWH?@^)>z?~Pe}5MH8fO|$%dPm`emeq9`IkmC&O>v|ghhPt
zPJ#?(Pa%%(ZgQ)0H*W|`z4shF2+s)bzWka0E(m8TbRpGC;tyn%d@~978F&Z_Rt;LA
zdNG|6@Lrw?+;O<+xaD%x<Omczyfi5|{Ceg>ENWtpSm10ABQl$ew(KF^k68T>NSgxW
zm2ybag93vXI+^oK!hi(h>PK){pr)!dh0;`t3-L7sV+KERrww34RztT&76+yqzYuxW
z*6O)vGq+;CT+yFCBR>irgtU8IMVB(SXjS#uT7o_4MRGE)Mf)}Qf3IG&Y=%Q(uWDZf
zwBXi*P9l8f+O!cuDMt{#ZT{4U9K~*B6TsB&0-D~8;i1cv(1iMp(Jv?4wtBb0DqWIj
zlXxNUowCIM_qMQ3@MDh~tPQ+X39#sJQW;vUx?OksBM(d>88)4w%Ei4=bx3ho?N4-w
zHOLeAeO6@MIKA@<3v!|Y!M<vSw&zr)0T_J}L_Ua&wMR)&X%^w?7=CT19;f2Ij`FmJ
zr=Of7aXHyY_7{}f(49*VNeN%*zu0W_l(phvy)Cse(LkPV2yo9r=DwptKkE|d3Q}JS
zB9h@Pl4m3aqdsJrAN4o1P}4|`?vg)*eYc+!CC#%rh8*Sm8ch5v7dQi?(xs^HOrr;>
zb_Fq!t$cX3Kebtu?zzqGjs9f=8FxHL^la+S#GMQRSZsadg6P3?#M6ODbQ~u8ZC8AR
zkL*tj5?;k99bIYn{(T*N0$r42L*H8ToYGugqGrm;lU&j8QD#b>YbzmD8nA9Mfh`uh
zJ$8AMQmfFrEC_gP5IQ0<!Sw>g+fSQyYv+RuR0+4JpRlmT@zT9s?oFCiOnYsa!whU|
zRn?KCGSwX$`AR~D4$oT184?`V!$Xzn^uRZ`5y5^mNC!zh9@lHh)}^!eZ(->otwS;W
z(Z%4~g4lO#Yb#dzMEiKAIHEc$mkq#TC`k;#<2&iacg|>Zi|3vgd_s~b)wP{)bp<Py
znLs@cf~xE0sUT%S(P^cm*O68!H93zpCld`0{!(%pVMh!Lh+6#+f-8w+N}MGa@7dlP
zsH?d9Sb;3fL0_WL?5OSY!5gAnl|(7&=@;$#P#rhBi5~#!S~!XtxM(m5`~=n@y^;Hr
zxNH*$HAzeB@t{P`EZaxGLWlmFWcj?U*`Q2}fHRrdpS5gUA~2tK$&eIQLvt$Ptsp2g
z1or2otS!OkgTxcHJnu7kmd(G7ePreTv?A0~g74V{Bp6GeambPI%L9@Y<uS%nq3Yk`
zbPl6asYLx2ZWPA0>5=Jm)A(ySCnk}Ja~@k&Fa1R5sKL*~ca?P-+*wt<uM*M^j(9j*
zF3nP*&G5nM=X|vh{O9iKLbB5j=;{cp+4MFYHbC|DeD*aYG7iccY+$hUCNc{W23T&E
z_$psJo91N*0GL;*N*{oEq0Y++(*$>`{+?d^4FD++0YGNBRE8$eZ_wse9bi^59fg7c
z){nmcgHD6}nF`2;<>IYJruV34Xh+{eu|Zb(88-D_IQ8Jsv*NVjnCKhSz_Sa5kFZXl
z7D~XSs7j+O{$iej5y$RkS$G>HT5GP!pzu}m5A4JPGD32eRPInMUe{7a9JSo$xig&1
zzXH)bj}U}Hl(~N}Qye%fABWEk8misP;OqO5t<ZW++MFU`FwNojx4ckUMSHJ5;P}5_
zx+_coId(k`W?uIM^7%}6$VMVIuv?8-m-fedO=Jt2OuRvMJ)PR$-iaeuRZ{-{7CfHJ
z6L0=s;PKnW=Re@_o2CHW(|ra43ISp~QRmw_!g1B(cI{mg_L$xn=Pi{mzq$V#l*&a+
z2Y`)h`r<CQF92FHIEA4`_z#H0qXs~AwWv;;@!>e^%-CK}=i%tuQh&QV4?H64!(I;P
zUt383yl(s(NT139m6us!M+H<E2EFc_fyy5UQW<iN_GU2lit8=dnM-x7-j2A$?Z<Y>
zC8q~iME`RY@Ei;@Zy=u%9A$g*ACRvQz^CLqS|t7}wDtMS3jhoi+n`6C6}>l!|AeDI
z5fu=^L;Cm5=EoOG{CEFp9LEB~1*TnRzlb9JnopXFSu6|1PjrFhwu$&@<8J!dnlHSg
zXt7&P@*9-Y0Px(NM(giw-Y^M-)Be?z8=u|gQ76rl(p^s=>t#WSS!q#G*JR2UGmzI<
zYPGtO-w)zx5fRay!6e=c$ZM~SX63ESR9bB<!y`2ofCIrJbj02Q``{lp`@#>so6pf)
zqu+IbuTF^5H0GjPm0#V~0L%2r`bu;x=NU!i4<AdP!QNI_H_E2rFMaj<>@w07SNYK5
zORE3?GRX)!I#aA%zaRZA91N4R<JC^hqE-=u)75^{KS}=0UE#n0(4`quE?EF1ji>^E
zAU;F@HLL3E!uPsUSj^|>tY@q6cqS59qFa|1ecUpU8sRgI53w5S;hpp~%)$ZK_-0No
zQuXjS#9lsMVn4)j;nxdE1ptWOvb;M57BI$G?;m^Xvg)$Ip;!kd@f&mufi#lSbmDYw
zIbiy$WR2_Fv5#kWUsBXg!effLwOUI6WSCpe?qBZwub-a**y;)MUw`=5PbL9ifFUq9
zjQJl^d9(0;P&*lw3-*70{_C%AKgSUO7YL|G7X8~J{@;sq22TIyDgVBehyM*j>oO$y
zkJbFITm6aKLH~>y|LP_16Ar+6O(9d0|8(u&5B>i3fgk_(^?#niB%rNd+Ne~bB5Ni$
z^;f9<KOMVz?@i#J_XgVMi6`E?hqjXapO4C12K)GRE6P#X#N$6tjP3}M4E|Ed5&GYS
zwuk)YuCWlu7XJ;X_cnanUSpOW`m=rhkCXo`1`j?!9n!cp_2mBza88i~TIU6*rT?dY
z|8w^h6rd3<m+_VS|ETX@GBRccS`CqFIsTj6$4LPJK5X&2rT+QGf6Fp+9_$5186J8M
UIDanl4)`Z3C?!zFr|t9q0ml2`KmY&$

diff --git a/.gitignore b/.gitignore
index ee0c254..11d8384 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,10 +1,3 @@
+# IDE
 .idea
 .vscode
-# do not change or delete this comment - `python setup.py clean` deletes everything after this line
-dist/
-build/
-*.egg-info/
-torchcsprng/version.py
-*/__pycache__
-.pytest_cache
-*.so
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
deleted file mode 100644
index b91e23b..0000000
--- a/CODE_OF_CONDUCT.md
+++ /dev/null
@@ -1,76 +0,0 @@
-# Code of Conduct
-
-## Our Pledge
-
-In the interest of fostering an open and welcoming environment, we as
-contributors and maintainers pledge to make participation in our project and
-our community a harassment-free experience for everyone, regardless of age, body
-size, disability, ethnicity, sex characteristics, gender identity and expression,
-level of experience, education, socio-economic status, nationality, personal
-appearance, race, religion, or sexual identity and orientation.
-
-## Our Standards
-
-Examples of behavior that contributes to creating a positive environment
-include:
-
-* Using welcoming and inclusive language
-* Being respectful of differing viewpoints and experiences
-* Gracefully accepting constructive criticism
-* Focusing on what is best for the community
-* Showing empathy towards other community members
-
-Examples of unacceptable behavior by participants include:
-
-* The use of sexualized language or imagery and unwelcome sexual attention or
-advances
-* Trolling, insulting/derogatory comments, and personal or political attacks
-* Public or private harassment
-* Publishing others' private information, such as a physical or electronic
-address, without explicit permission
-* Other conduct which could reasonably be considered inappropriate in a
-professional setting
-
-## Our Responsibilities
-
-Project maintainers are responsible for clarifying the standards of acceptable
-behavior and are expected to take appropriate and fair corrective action in
-response to any instances of unacceptable behavior.
-
-Project maintainers have the right and responsibility to remove, edit, or
-reject comments, commits, code, wiki edits, issues, and other contributions
-that are not aligned to this Code of Conduct, or to ban temporarily or
-permanently any contributor for other behaviors that they deem inappropriate,
-threatening, offensive, or harmful.
-
-## Scope
-
-This Code of Conduct applies within all project spaces, and it also applies when
-an individual is representing the project or its community in public spaces.
-Examples of representing a project or community include using an official
-project e-mail address, posting via an official social media account, or acting
-as an appointed representative at an online or offline event. Representation of
-a project may be further defined and clarified by project maintainers.
-
-## Enforcement
-
-Instances of abusive, harassing, or otherwise unacceptable behavior may be
-reported by contacting the project team at <conduct@pytorch.org>. All
-complaints will be reviewed and investigated and will result in a response that
-is deemed necessary and appropriate to the circumstances. The project team is
-obligated to maintain confidentiality with regard to the reporter of an incident.
-Further details of specific enforcement policies may be posted separately.
-
-Project maintainers who do not follow or enforce the Code of Conduct in good
-faith may face temporary or permanent repercussions as determined by other
-members of the project's leadership.
-
-## Attribution
-
-This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
-available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
-
-[homepage]: https://www.contributor-covenant.org
-
-For answers to common questions about this code of conduct, see
-https://www.contributor-covenant.org/faq
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
deleted file mode 100644
index fdc1528..0000000
--- a/CONTRIBUTING.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# Contributing to csprng
-We want to make contributing to this project as easy and transparent as
-possible.
-
-## Pull Requests
-We actively welcome your pull requests.
-
-1. Fork the repo and create your branch from `master`.
-2. If you've added code that should be tested, add tests.
-3. If you've changed APIs, update the documentation.
-4. Ensure the test suite passes.
-5. Make sure your code lints.
-6. If you haven't already, complete the Contributor License Agreement ("CLA").
-
-## Contributor License Agreement ("CLA")
-In order to accept your pull request, we need you to submit a CLA. You only need
-to do this once to work on any of Facebook's open source projects.
-
-Complete your CLA here: <https://code.facebook.com/cla>
-
-## Issues
-We use GitHub issues to track public bugs. Please ensure your description is
-clear and has sufficient instructions to be able to reproduce the issue.
-
-Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
-disclosure of security bugs. In those cases, please go through the process
-outlined on that page and do not file a public issue.
-
-## License
-By contributing to csprng, you agree that your contributions will be licensed
-under the LICENSE file in the root directory of this source tree.
\ No newline at end of file
diff --git a/README.md b/README.md
index 128e327..c71f0a8 100644
--- a/README.md
+++ b/README.md
@@ -4,14 +4,14 @@
 
 torchcsprng is a [PyTorch C++/CUDA extension](https://pytorch.org/tutorials/advanced/cpp_extension.html) that provides:
 
-- [AES](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) 128-bit encryption/decryption in two modes: [ECB](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Electronic_codebook_(ECB)) and [CTR](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Counter_(CTR)) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pytorch/csprng/blob/master/examples/encrypt_decrypt.ipynb)
+- [AES](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) 128-bit encryption/decryption in two modes: [ECB](<https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Electronic_codebook_(ECB)>) and [CTR](<https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Counter_(CTR)>) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pytorch/csprng/blob/master/examples/encrypt_decrypt.ipynb)
 - [cryptographically secure pseudorandom number generators](https://en.wikipedia.org/wiki/Cryptographically_secure_pseudorandom_number_generator) for PyTorch. [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pytorch/csprng/blob/master/examples/csprng.ipynb)
 
 ## Design
 
 torchcsprng generates a random 128-bit key on CPU using one of its generators and runs
-[AES128](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) in [CTR mode](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Counter_(CTR))
- either on CPU or on GPU using CUDA to generate a random 128 bit state and apply a transformation function to map it to target tensor values.
+[AES128](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) in [CTR mode](<https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Counter_(CTR)>)
+either on CPU or on GPU using CUDA to generate a random 128 bit state and apply a transformation function to map it to target tensor values.
 This approach is based on [Parallel Random Numbers: As Easy as 1, 2, 3(John K. Salmon, Mark A. Moraes, Ron O. Dror, and David E. Shaw, D. E. Shaw Research)](http://www.thesalmons.org/john/random123/papers/random123sc11.pdf).
 It makes torchcsprng both crypto-secure and parallel on CUDA and CPU.
 
@@ -25,96 +25,95 @@ Advantages:
 
 ## Features
 
-torchcsprng 0.2.0 exposes new API for tensor encryption/decryption. Tensor encryption/decryption API is dtype agnostic, so a tensor of any dtype can be encrypted and the result can be stored to a tensor of any dtype. An encryption key also can be a tensor of any dtype. Currently torchcsprng supports [AES](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) cipher with 128-bit key in two modes: [ECB](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Electronic_codebook_(ECB)) and [CTR](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Counter_(CTR)).
+torchcsprng 0.2.0 exposes new API for tensor encryption/decryption. Tensor encryption/decryption API is dtype agnostic, so a tensor of any dtype can be encrypted and the result can be stored to a tensor of any dtype. An encryption key also can be a tensor of any dtype. Currently torchcsprng supports [AES](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) cipher with 128-bit key in two modes: [ECB](<https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Electronic_codebook_(ECB)>) and [CTR](<https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Counter_(CTR)>).
 
-* `torchcsprng.encrypt(input: Tensor, output: Tensor, key: Tensor, cipher: string, mode: string)`
+- `torchcsprng.encrypt(input: Tensor, output: Tensor, key: Tensor, cipher: string, mode: string)`
 
 > - `input` tensor can be any CPU or CUDA tensor of any dtype and size in bytes(zero-padding is used to make its size in bytes divisible by block size in bytes)
 > - `output` tensor can have any dtype and the same device as `input` tensor and the size in bytes rounded up to the block size in bytes(16 bytes for AES 128)
 > - `key` tensor can have any dtype and the same device as `input` tensor and size in bytes equal to 16 for AES 128
 > - `cipher` currently can be only one supported value `"aes128"`
-> - `mode` currently can be either [`"ecb"`](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Electronic_codebook_(ECB)) or [`"ctr"`](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Counter_(CTR))
+> - `mode` currently can be either [`"ecb"`](<https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Electronic_codebook_(ECB)>) or [`"ctr"`](<https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Counter_(CTR)>)
 
-* `torchcsprng.decrypt(input: Tensor, output: Tensor, key: Tensor, cipher: string, mode: string)`
+- `torchcsprng.decrypt(input: Tensor, output: Tensor, key: Tensor, cipher: string, mode: string)`
 
 > - `input` tensor can be any CPU or CUDA tensor of any dtype with size in bytes divisible by the block size in bytes(16 bytes for AES 128)
 > - `output` tensor can have any dtype but the same device as `input` tensor and the same size in bytes as `input` tensor
 > - `key` tensor can have any dtype and the same device as `input` tensor and size in bytes equal to 16 for AES 128
 > - `cipher` currently can be only one supported value `"aes128"`
-> - `mode` currently can be either [`"ecb"`](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Electronic_codebook_(ECB)) or [`"ctr"`](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Counter_(CTR))
+> - `mode` currently can be either [`"ecb"`](<https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Electronic_codebook_(ECB)>) or [`"ctr"`](<https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Counter_(CTR)>)
 
 torchcsprng exposes two methods to create crypto-secure and non-crypto-secure PRNGs:
 
-| Method to create PRNG                              | Is crypto-secure? | Has seed? | Underlying implementation |
-|----------------------------------------------------|-------------------|-----------|---------------------------|
-| create_random_device_generator(token: string=None) |         yes       |    no     | See [std::random_device](https://en.cppreference.com/w/cpp/numeric/random/random_device) and [its constructor](https://en.cppreference.com/w/cpp/numeric/random/random_device/random_device). The implementation in libstdc++ expects token to name the source of random bytes. Possible token values include "default", "rand_s", "rdseed", "rdrand", "rdrnd", "/dev/urandom", "/dev/random", "mt19937", and integer string specifying the seed of the mt19937 engine. (Token values other than "default" are only valid for certain targets.) If token=None then constructs a new std::random_device object with an implementation-defined token. |
-| create_mt19937_generator(seed: int=None)           |         no        |    yes    | See [std::mt19937](https://en.cppreference.com/w/cpp/numeric/random/mersenne_twister_engine) and [its constructor](https://en.cppreference.com/w/cpp/numeric/random/mersenne_twister_engine/mersenne_twister_engine). Constructs a mersenne_twister_engine object, and initializes its internal state sequence to pseudo-random values. If seed=None then seeds the engine with default_seed.|
+| Method to create PRNG                              | Is crypto-secure? | Has seed? | Underlying implementation                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
+| -------------------------------------------------- | ----------------- | --------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| create_random_device_generator(token: string=None) | yes               | no        | See [std::random_device](https://en.cppreference.com/w/cpp/numeric/random/random_device) and [its constructor](https://en.cppreference.com/w/cpp/numeric/random/random_device/random_device). The implementation in libstdc++ expects token to name the source of random bytes. Possible token values include "default", "rand_s", "rdseed", "rdrand", "rdrnd", "/dev/urandom", "/dev/random", "mt19937", and integer string specifying the seed of the mt19937 engine. (Token values other than "default" are only valid for certain targets.) If token=None then constructs a new std::random_device object with an implementation-defined token. |
+| create_mt19937_generator(seed: int=None)           | no                | yes       | See [std::mt19937](https://en.cppreference.com/w/cpp/numeric/random/mersenne_twister_engine) and [its constructor](https://en.cppreference.com/w/cpp/numeric/random/mersenne_twister_engine/mersenne_twister_engine). Constructs a mersenne_twister_engine object, and initializes its internal state sequence to pseudo-random values. If seed=None then seeds the engine with default_seed.                                                                                                                                                                                                                                                       |
 
 The following list of methods supports all forementioned PRNGs:
 
-| Kernel                 | CUDA | CPU |
-|------------------------|------|-----|
-| random_()              | yes  | yes |
-| random_(to)            | yes  | yes |
-| random_(from, to)      | yes  | yes |
-| uniform_(from, to)     | yes  | yes |
-| normal_(mean, std)     | yes  | yes |
-| cauchy_(median, sigma) | yes  | yes |
-| log_normal_(mean, std) | yes  | yes |
-| geometric_(p)          | yes  | yes |
-| exponential_(lambda)   | yes  | yes |
-| randperm(n)            | yes* | yes |
-
-* the calculations are done on CPU and the result is copied to CUDA
+| Kernel                  | CUDA  | CPU |
+| ----------------------- | ----- | --- |
+| random\_()              | yes   | yes |
+| random\_(to)            | yes   | yes |
+| random\_(from, to)      | yes   | yes |
+| uniform\_(from, to)     | yes   | yes |
+| normal\_(mean, std)     | yes   | yes |
+| cauchy\_(median, sigma) | yes   | yes |
+| log*normal*(mean, std)  | yes   | yes |
+| geometric\_(p)          | yes   | yes |
+| exponential\_(lambda)   | yes   | yes |
+| randperm(n)             | yes\* | yes |
+
+- the calculations are done on CPU and the result is copied to CUDA
 
 ## Installation
 
 CSPRNG works with Python 3.6-3.9 on the following operating systems and can be used with PyTorch tensors on the following devices:
 
-| Tensor Device Type | Linux     | macOS         | MS Window      |
-|--------------------|-----------|---------------|----------------| 
-| CPU                | Supported | Supported     | Supported      |
+| Tensor Device Type | Linux     | macOS         | MS Window             |
+| ------------------ | --------- | ------------- | --------------------- |
+| CPU                | Supported | Supported     | Supported             |
 | CUDA               | Supported | Not Supported | Supported since 0.2.0 |
 
 The following is the corresponding CSPRNG versions and supported Python versions.
 
-| PyTorch | CSPRNG | Python   | CUDA             |
-|---------|--------|----------|------------------|
-| 1.8.0   | 0.2.0  | 3.7-3.9  | 10.1, 10.2, 11.1 |
-| 1.7.1   | 0.1.4  | 3.6-3.8  | 9.2, 10.1, 10.2  |
-| 1.7.0   | 0.1.3  | 3.6-3.8  | 9.2, 10.1, 10.2  |
-| 1.6.0   | 0.1.2  | 3.6-3.8  | 9.2, 10.1, 10.2  |
-
+| PyTorch | CSPRNG | Python  | CUDA             |
+| ------- | ------ | ------- | ---------------- |
+| 1.8.0   | 0.2.0  | 3.7-3.9 | 10.1, 10.2, 11.1 |
+| 1.7.1   | 0.1.4  | 3.6-3.8 | 9.2, 10.1, 10.2  |
+| 1.7.0   | 0.1.3  | 3.6-3.8 | 9.2, 10.1, 10.2  |
+| 1.6.0   | 0.1.2  | 3.6-3.8 | 9.2, 10.1, 10.2  |
 
 ### Binary Installation
 
 Anaconda:
 
 | OS            | CUDA                                           |                                                                                                                                                                                                                                                                                                       |
-|---------------|------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| ------------- | ---------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | Linux/Windows | 10.1<br/><br/>10.2<br/><br/>11.1<br/><br/>None | conda install torchcsprng cudatoolkit=10.1 -c pytorch -c conda-forge<br/><br/>conda install torchcsprng cudatoolkit=10.2 -c pytorch -c conda-forge<br/><br/>conda install torchcsprng cudatoolkit=11.1 -c pytorch -c conda-forge<br/><br/>conda install torchcsprng cpuonly -c pytorch -c conda-forge |
-| macOS         | None                                           | conda install torchcsprng -c pytorch                                                                                                                                                                                                                                                          |
+| macOS         | None                                           | conda install torchcsprng -c pytorch                                                                                                                                                                                                                                                                  |
 
 pip:
 
-| OS            | CUDA                                           |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
-|---------------|------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| OS            | CUDA                                           |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
+| ------------- | ---------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | Linux/Windows | 10.1<br/><br/>10.2<br/><br/>11.1<br/><br/>None | pip install torchcsprng==0.2.0+cu101 torch==1.8.0+cu101 -f https://download.pytorch.org/whl/cu101/torch_stable.html <br/><br/>pip install torchcsprng==0.2.0 torch==1.8.0 -f https://download.pytorch.org/whl/cu102/torch_stable.html <br/><br/>pip install torchcsprng==0.2.0+cu111 torch==1.8.0+cu111 -f https://download.pytorch.org/whl/cu111/torch_stable.html <br/><br/>pip install torchcsprng==0.2.0+cpu torch==1.8.0+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html |
-| macOS         | None                                           | pip install torchcsprng torch                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+| macOS         | None                                           | pip install torchcsprng torch                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
 
 ### Nightly builds:
 
 Anaconda:
 
 | OS            | CUDA                                           |                                                                                                                                                                                                                                                                                                                                       |
-|---------------|------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| ------------- | ---------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | Linux/Windows | 10.1<br/><br/>10.2<br/><br/>11.1<br/><br/>None | conda install torchcsprng cudatoolkit=10.1 -c pytorch-nightly -c conda-forge<br/><br/>conda install torchcsprng cudatoolkit=10.2 -c pytorch-nightly -c conda-forge<br/><br/>conda install torchcsprng cudatoolkit=11.1 -c pytorch-nightly -c conda-forge<br/><br/>conda install torchcsprng cpuonly -c pytorch-nightly -c conda-forge |
 | macOS         | None                                           | conda install torchcsprng -c pytorch-nightly                                                                                                                                                                                                                                                                                          |
 
 pip:
 
 | OS            | CUDA                                           |                                                                                                                                                                                                                                                                                                                                                                                                                                            |
-|---------------|------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| ------------- | ---------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
 | Linux/Windows | 10.1<br/><br/>10.2<br/><br/>11.1<br/><br/>None | pip install --pre torchcsprng -f https://download.pytorch.org/whl/nightly/cu101/torch_nightly.html <br/><br/> pip install --pre torchcsprng -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html <br/><br/> pip install --pre torchcsprng -f https://download.pytorch.org/whl/nightly/cu111/torch_nightly.html <br/><br/> pip install --pre torchcsprng -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html |
 | macOS         | None                                           | pip install --pre torchcsprng -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html                                                                                                                                                                                                                                                                                                                                           |
 
@@ -122,46 +121,57 @@ pip:
 
 torchcsprng is a Python C++/CUDA extension that depends on PyTorch. In order to build CSPRNG from source it is required to have Python(>=3.7) with PyTorch(>=1.8.0) installed and C++ compiler(gcc/clang for Linux, XCode for macOS, Visual Studio for MS Windows).
 To build torchcsprng you can run the following:
+
 ```console
 python setup.py install
 ```
+
 By default, GPU support is built if CUDA is found and torch.cuda.is_available() is True. Additionally, it is possible to force building GPU support by setting the FORCE_CUDA=1 environment variable, which is useful when building a docker image.
 
 ## Getting Started
 
 The torchcsprng API is available in `torchcsprng` module:
+
 ```python
 import torch
 import torchcsprng as csprng
 ```
+
 Create crypto-secure PRNG from /dev/urandom:
+
 ```python
 urandom_gen = csprng.create_random_device_generator('/dev/urandom')
 ```
 
 Create empty boolean tensor on CUDA and initialize it with random values from urandom_gen:
+
 ```python
 torch.empty(10, dtype=torch.bool, device='cuda').random_(generator=urandom_gen)
 ```
+
 ```
 tensor([ True, False, False,  True, False, False, False,  True, False, False],
        device='cuda:0')
 ```
 
 Create empty int16 tensor on CUDA and initialize it with random values in range [0, 100) from urandom_gen:
+
 ```python
 torch.empty(10, dtype=torch.int16, device='cuda').random_(100, generator=urandom_gen)
 ```
+
 ```
 tensor([59, 20, 68, 51, 18, 37,  7, 54, 74, 85], device='cuda:0',
        dtype=torch.int16)
 ```
 
 Create non-crypto-secure MT19937 PRNG:
+
 ```python
 mt19937_gen = csprng.create_mt19937_generator()
 torch.empty(10, dtype=torch.int64, device='cuda').random_(torch.iinfo(torch.int64).min, to=None, generator=mt19937_gen)
 ```
+
 ```
 tensor([-7584783661268263470,  2477984957619728163, -3472586837228887516,
         -5174704429717287072,  4125764479102447192, -4763846282056057972,
@@ -170,37 +180,42 @@ tensor([-7584783661268263470,  2477984957619728163, -3472586837228887516,
 ```
 
 Create crypto-secure PRNG from default random device:
+
 ```python
 default_device_gen = csprng.create_random_device_generator()
 torch.randn(10, device='cuda', generator=default_device_gen)
 ```
+
 ```
 tensor([ 1.2885,  0.3240, -1.1813,  0.8629,  0.5714,  2.3720, -0.5627, -0.5551,
         -0.6304,  0.1090], device='cuda:0')
 ```
 
 Create non-crypto-secure MT19937 PRNG with seed:
+
 ```python
 mt19937_gen = csprng.create_mt19937_generator(42)
 torch.empty(10, device='cuda').geometric_(p=0.2, generator=mt19937_gen)
 ```
+
 ```
 tensor([ 7.,  1.,  8.,  1., 11.,  3.,  1.,  1.,  5., 10.], device='cuda:0')
 ```
 
 Recreate MT19937 PRNG with the same seed:
+
 ```python
 mt19937_gen = csprng.create_mt19937_generator(42)
 torch.empty(10, device='cuda').geometric_(p=0.2, generator=mt19937_gen)
 ```
+
 ```
 tensor([ 7.,  1.,  8.,  1., 11.,  3.,  1.,  1.,  5., 10.], device='cuda:0')
 ```
 
 ## Contributing
-We appreciate all contributions. If you are planning to contribute back bug-fixes, please do so without any further discussion. If you plan to contribute new features, utility functions or extensions, please first open an issue and discuss the feature with us.
-
 
+We appreciate all contributions. If you are planning to contribute back bug-fixes, please do so without any further discussion. If you plan to contribute new features, utility functions or extensions, please first open an issue and discuss the feature with us.
 
 ## License
 
diff --git a/examples/csprng.ipynb b/examples/csprng.ipynb
deleted file mode 100644
index 1f6b477..0000000
--- a/examples/csprng.ipynb
+++ /dev/null
@@ -1,226 +0,0 @@
-{
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "name": "csprng.ipynb",
-      "provenance": [],
-      "collapsed_sections": []
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    }
-  },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Lpno_zUJT8ms"
-      },
-      "source": [
-        "# Cryptographically secure pseudorandom number generators for PyTorch\n",
-        "\n",
-        "The torchcsprng API is available in `torchcsprng` module:\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "db4YYky-PDI_"
-      },
-      "source": [
-        "!pip install torchcsprng==0.2.0 torch==1.8.0 -f https://download.pytorch.org/whl/cu101/torch_stable.html"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "O1s_j8CPPHSn"
-      },
-      "source": [
-        "import torch\n",
-        "import torchcsprng as csprng"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "o1Kz25IoS9m-"
-      },
-      "source": [
-        "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "HLlLxkDIUWCG"
-      },
-      "source": [
-        "Create crypto-secure PRNG from /dev/urandom:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "yyyYlq5kUQss"
-      },
-      "source": [
-        "urandom_gen = csprng.create_random_device_generator('/dev/urandom')"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "xbUCnJfkUdUI"
-      },
-      "source": [
-        "Create empty boolean tensor on the `device` and initialize it with random values from `urandom_gen`:\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "zmj_VlIzUYIO"
-      },
-      "source": [
-        "torch.empty(10, dtype=torch.bool, device=device).random_(generator=urandom_gen)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ycODsYhtUud9"
-      },
-      "source": [
-        "Create empty int16 tensor on the `device` and initialize it with random values in range [0, 100) from `urandom_gen`:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "uel-jbW9UlZH"
-      },
-      "source": [
-        "torch.empty(10, dtype=torch.int16, device=device).random_(100, generator=urandom_gen)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "1jXW1FEmVMW_"
-      },
-      "source": [
-        "Create non-crypto-secure MT19937 PRNG:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "sL-cwFGfVOrp"
-      },
-      "source": [
-        "mt19937_gen = csprng.create_mt19937_generator()\n",
-        "torch.empty(10, dtype=torch.int64, device=device).random_(torch.iinfo(torch.int64).min, to=None, generator=mt19937_gen)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "KW96wT4UVXBm"
-      },
-      "source": [
-        "Create crypto-secure PRNG from default random device:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "tjwbuE6FVRgm"
-      },
-      "source": [
-        "default_device_gen = csprng.create_random_device_generator()\n",
-        "torch.randn(10, device=device, generator=default_device_gen)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "qYgdkZAYVfZT"
-      },
-      "source": [
-        "Create non-crypto-secure MT19937 PRNG with seed:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "xjOsYOxxVbzg"
-      },
-      "source": [
-        "mt19937_gen = csprng.create_mt19937_generator(42)\n",
-        "first = torch.empty(10, device=device).geometric_(p=0.2, generator=mt19937_gen)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "cV77v7tHVlRd"
-      },
-      "source": [
-        "Recreate MT19937 PRNG with the same seed:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "i0O2lC0hVjAg"
-      },
-      "source": [
-        "mt19937_gen = csprng.create_mt19937_generator(42)\n",
-        "second = torch.empty(10, device=device).geometric_(p=0.2, generator=mt19937_gen)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "OcgSK0mejcef"
-      },
-      "source": [
-        "Check that `first` equals to `second`:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "vMx1BRO3jh7L"
-      },
-      "source": [
-        "assert (first == second).all()"
-      ],
-      "execution_count": null,
-      "outputs": []
-    }
-  ]
-}
diff --git a/examples/encrypt_decrypt.ipynb b/examples/encrypt_decrypt.ipynb
deleted file mode 100644
index 3de8968..0000000
--- a/examples/encrypt_decrypt.ipynb
+++ /dev/null
@@ -1,307 +0,0 @@
-{
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "name": "encrypt_decrypt.ipynb",
-      "provenance": [],
-      "collapsed_sections": []
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    }
-  },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "4JG-7IJgz_dK"
-      },
-      "source": [
-        "# PyTorch/CSPRNG encrypt/decrypt examples"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "H8TZemj30JvQ"
-      },
-      "source": [
-        "torchcsprng 0.2.0 exposes new API for tensor encryption/decryption. Tensor encryption/decryption API is dtype agnostic, so a tensor of any dtype can be encrypted and the result can be stored to a tensor of any dtype. An encryption key also can be a tensor of any dtype. Currently torchcsprng supports AES cipher with 128-bit key in two modes: ECB and CTR."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "jC1O-C25vI0W"
-      },
-      "source": [
-        "!pip install torchcsprng==0.2.0 torch==1.8.0 -f https://download.pytorch.org/whl/cu101/torch_stable.html"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "su2RWWdOrWFU"
-      },
-      "source": [
-        "import torch\n",
-        "import torchcsprng as csprng"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "NHTOLPZ_3254"
-      },
-      "source": [
-        "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "17L0sgmy0R6o"
-      },
-      "source": [
-        "torchcsprng implementation of AES with 128 bit key requires to have a key tensor of 16 bytes but of any dtype"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "rw7WYZ-50To9"
-      },
-      "source": [
-        "key = torch.empty(16, dtype=torch.uint8, device=device).random_(0, 256)\n",
-        "key"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "RRfvyfHM4MY1"
-      },
-      "source": [
-        "Alternatively it can be a tensor of 8 elements of `torch.int16` or even 4 elements of `torch.float32`"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "rCy01t1-0dtO"
-      },
-      "source": [
-        "The size of input tensor is 42 * (32/8) = 168 bytes. AES 128 operates with 16-bytes blocks, so zero-padding of 8 bytes will be used to form 176 bytes(eleven 16-bytes blocks)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "LcuVmhyU0WTn"
-      },
-      "source": [
-        "initial = torch.empty(42, dtype=torch.float32, device=device).normal_(-24.0, 42.0)\n",
-        "initial"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "rPNq2u4e3tlJ"
-      },
-      "source": [
-        "torchcsprng requires output tensor to be of the same size in bytes as input tensor rounded up to 16 bytes(AES 128 block size), so if `torch.int64` is dtype of the destination tensor size must be 176 / (64/8) = 22"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "RAJya9GT0gb4"
-      },
-      "source": [
-        "encrypted = torch.empty(22, dtype=torch.int64, device=device)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "-DCI4QOh4oGX"
-      },
-      "source": [
-        "Call `torchcsprng.encrypt` to encrypt `initial` tensor in [ECB](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Electronic_codebook_(ECB)) mode with 128-bit `key` tensor and store the result to `encrypted` tensor."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "TK4OjPRq4lsJ"
-      },
-      "source": [
-        "csprng.encrypt(initial, encrypted, key, \"aes128\", \"ecb\")"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "yXUAwFHh5PSy"
-      },
-      "source": [
-        "Create an output tensor"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "4LtJ-kD446DJ"
-      },
-      "source": [
-        "decrypted = torch.empty_like(initial)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "8VcF04mf6Rn5"
-      },
-      "source": [
-        "Call `torchcsprng.decrypt` to decrypt `encrypted` tensor in ECB mode with 128-bit `key` tensor and store the result to `decrypted` tensor."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "kojXCFGK5v6l"
-      },
-      "source": [
-        "csprng.decrypt(encrypted, decrypted, key, \"aes128\", \"ecb\")"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "9dEBSPD6EFSu"
-      },
-      "source": [
-        "Let's check that `decrypted` equals to `initial`:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "yOc1ftnM5yyj"
-      },
-      "source": [
-        "assert (decrypted == initial).all()"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "cQWyteLlE4mQ"
-      },
-      "source": [
-        "Another example is to use [CTR](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Counter_(CTR)) mode with 128-bit `key` tensor of 4 elements of dtype `dtype=torch.float32`:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "ZFInqYawD7ks"
-      },
-      "source": [
-        "key = torch.empty(4, dtype=torch.float32, device=device).random_()\n",
-        "key"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "FRz94NaZGyRS"
-      },
-      "source": [
-        "Let's encrypt 100 elements `torch.bool` tensor and store the result in 56 elements `torch.int16` tensor:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "8uiqxiehF_is"
-      },
-      "source": [
-        "initial = torch.empty(100, dtype=torch.bool, device=device).random_()\n",
-        "initial"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "G0URlmQYGfcW"
-      },
-      "source": [
-        "encrypted = torch.empty(56, dtype=torch.int16, device=device)\n",
-        "csprng.encrypt(initial, encrypted, key, \"aes128\", \"ctr\")"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "U9Zz2oXoHw9Q"
-      },
-      "source": [
-        "Decrypt it back and check that `decrypted` equals to `initial`:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "YXNcdUbXHoPC"
-      },
-      "source": [
-        "decrypted = torch.empty_like(initial)\n",
-        "csprng.decrypt(encrypted, decrypted, key, \"aes128\", \"ctr\")\n",
-        "decrypted"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "ie7epw1SKrdQ"
-      },
-      "source": [
-        "assert (decrypted == initial).all()"
-      ],
-      "execution_count": null,
-      "outputs": []
-    }
-  ]
-}
diff --git a/packaging/README.md b/packaging/README.md
deleted file mode 100644
index 20ff064..0000000
--- a/packaging/README.md
+++ /dev/null
@@ -1,90 +0,0 @@
-# Building torchcsprng packages for release
-
-## Anaconda packages
-
-### Linux
-
-```bash
-nvidia-docker run -it --ipc=host --rm -v $(pwd):/remote soumith/conda-cuda bash
-pushd remote/conda
-
-./build_csprng.sh 9.0
-./build_csprng.sh 10.0
-./build_csprng.sh cpu
-
-# copy packages over to /remote
-# exit docker
-# anaconda upload -u pytorch torchcsprng*.bz2
-```
-
-### OSX
-
-```bash
-# create a fresh anaconda environment / install and activate it
-conda install -y conda-build anaconda-client
-./build_csprng.sh cpu
-
-# copy packages over to /remote
-# exit docker
-# anaconda upload -u pytorch torchcsprng*.bz2
-```
-
-### Windows
-
-```bash
-# Open `Git Bash` and change dir to `conda`
-./build_csprng.sh 9.0
-./build_csprng.sh 10.0
-./build_csprng.sh cpu
-
-# copy packages to a output directory
-# anaconda upload -u pytorch torchcsprng*.bz2
-```
-
-## Wheels
-
-### Linux
-
-pushd wheel
-
-```bash
-nvidia-docker run -it --ipc=host --rm -v $(pwd):/remote soumith/manylinux-cuda90:latest bash
-cd remote
-./linux_manywheel.sh cu90
-
-rm -rf /usr/local/cuda*
-./linux_manywheel.sh cpu
-```
-
-```bash
-nvidia-docker run -it --ipc=host --rm -v $(pwd):/remote soumith/manylinux-cuda100:latest bash
-cd remote
-./linux_manywheel.sh cu100
-```
-
-wheels are in the folders `cpu`, `cu90`, `cu100`.
-
-You can upload the `cu90` wheels to twine with `twine upload *.whl`.
-Which wheels we upload depends on which wheels PyTorch uploads as default, and right now, it's `cu90`.
-
-### OSX
-
-```bash
-pushd wheel
-./osx_wheel.sh
-```
-
-### Windows
-
-```cmd
-set PYTORCH_REPO=pytorch
-
-pushd windows
-call build_csprng.bat 90 0.3.0 1
-call build_csprng.bat 100 0.3.0 1
-call build_csprng.bat cpu 0.3.0 1
-```
-
-wheels are in the current folder.
-
-You can upload them to twine with `twine upload *.whl`
diff --git a/packaging/build_conda.sh b/packaging/build_conda.sh
deleted file mode 100755
index e0e096d..0000000
--- a/packaging/build_conda.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/bin/bash
-set -ex
-
-script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-. "$script_dir/pkg_helpers.bash"
-
-export BUILD_TYPE=conda
-setup_env $(cat "version.txt" | sed "s/\([0-9]*\.[0-9]*\.[0-9]*\).*/\1/g")
-export SOURCE_ROOT_DIR="$PWD"
-setup_conda_pytorch_constraint
-setup_conda_cudatoolkit_constraint
-setup_visual_studio_constraint
-setup_junit_results_folder
-conda build $CONDA_CHANNEL_FLAGS -c defaults -c conda-forge --no-anaconda-upload --python "$PYTHON_VERSION" packaging/torchcsprng
diff --git a/packaging/build_wheel.sh b/packaging/build_wheel.sh
deleted file mode 100755
index 15b85a4..0000000
--- a/packaging/build_wheel.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/bin/bash
-set -ex
-
-script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-. "$script_dir/pkg_helpers.bash"
-
-export BUILD_TYPE=wheel
-setup_env $(cat "version.txt" | sed "s/\([0-9]*\.[0-9]*\.[0-9]*\).*/\1/g")
-setup_wheel_python
-pip_install numpy pyyaml future ninja
-setup_pip_pytorch_version
-python setup.py clean
-
-# Copy binaries to be included in the wheel distribution
-if [[ "$(uname)" == Darwin || "$OSTYPE" == "msys" ]]; then
-    python_exec="$(which python)"
-    bin_path=$(dirname $python_exec)
-    env_path=$(dirname $bin_path)
-    if [[ "$(uname)" == Darwin ]]; then
-        # Install delocate to relocate the required binaries
-        pip_install delocate
-    fi
-else
-    # Install auditwheel to get some inspection utilities
-    pip_install auditwheel
-
-    # Point to custom libraries
-    export LD_LIBRARY_PATH=$(pwd)/ext_libraries/lib:$LD_LIBRARY_PATH
-    export TORCHCSPRNG_INCLUDE=$(pwd)/ext_libraries/include
-    export TORCHCSPRNG_LIBRARY=$(pwd)/ext_libraries/lib
-fi
-
-if [[ "$OSTYPE" == "msys" ]]; then
-    IS_WHEEL=1 "$script_dir/windows/internal/vc_env_helper.bat" python setup.py bdist_wheel
-else
-    IS_WHEEL=1 python setup.py bdist_wheel
-fi
-
-
-if [[ "$(uname)" == Darwin ]]; then
-    pushd dist/
-    python_exec="$(which python)"
-    bin_path=$(dirname $python_exec)
-    env_path=$(dirname $bin_path)
-    for whl in *.whl; do
-        DYLD_LIBRARY_PATH="$env_path/lib/:$DYLD_LIBRARY_PATH" delocate-wheel -v $whl
-    done
-else
-    if [[ "$OSTYPE" == "msys" ]]; then
-        "$script_dir/windows/internal/vc_env_helper.bat" python $script_dir/wheel/relocate.py
-    else
-        LD_LIBRARY_PATH="/usr/local/lib:$LD_LIBRARY_PATH" python $script_dir/wheel/relocate.py
-    fi
-fi
diff --git a/packaging/conda/build_csprng.sh b/packaging/conda/build_csprng.sh
deleted file mode 100755
index 44fc0af..0000000
--- a/packaging/conda/build_csprng.sh
+++ /dev/null
@@ -1,229 +0,0 @@
-#!/usr/bin/env bash
-if [[ -x "/remote/anaconda_token" ]]; then
-    . /remote/anaconda_token || true
-fi
-
-set -ex
-
-if [[ "$CIRCLECI" == 'true' ]]; then
-    export PATH="/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/bin:/sbin:.:$PATH"
-fi
-
-# Function to retry functions that sometimes timeout or have flaky failures
-retry () {
-    $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
-}
-
-# Parse arguments and determmine version
-###########################################################
-if [[ -n "$DESIRED_CUDA" && -n "$TORCHCSPRNG_BUILD_VERSION" && -n "$TORCHCSPRNG_BUILD_NUMBER" ]]; then
-    desired_cuda="$DESIRED_CUDA"
-    build_version="$PYTORCH_BUILD_VERSION"
-    build_number="$PYTORCH_BUILD_NUMBER"
-else
-    if [ "$#" -ne 3 ]; then
-        echo "Illegal number of parameters. Pass cuda version, pytorch version, build number"
-        echo "CUDA version should be Mm with no dot, e.g. '80'"
-        echo "DESIRED_PYTHON should be M.m, e.g. '2.7'"
-        exit 1
-    fi
-
-    desired_cuda="$1"
-    build_version="$2"
-    build_number="$3"
-fi
-if [[ "$desired_cuda" != cpu ]]; then
-  desired_cuda="$(echo $desired_cuda | tr -d cuda. )"
-fi
-echo "Building cuda version $desired_cuda and torchcsprng version: $build_version build_number: $build_number"
-
-if [[ "$desired_cuda" == 'cpu' ]]; then
-    cpu_only=1
-    cuver="cpu"
-else
-    # Switch desired_cuda to be M.m to be consistent with other scripts in
-    # pytorch/builder
-    export FORCE_CUDA=1
-    cuda_nodot="$desired_cuda"
-
-    if [[ ${#cuda_nodot} -eq 2 ]]; then
-        desired_cuda="${desired_cuda:0:1}.${desired_cuda:1:1}"
-    elif [[ ${#cuda_nodot} -eq 3 ]]; then
-        desired_cuda="${desired_cuda:0:2}.${desired_cuda:2:1}"
-    else
-        echo "unknown cuda version $cuda_nodot"
-        exit 1
-    fi
-
-    cuver="cu$cuda_nodot"
-fi
-
-export TORCHCSPRNG_BUILD_VERSION=$build_version
-export TORCHCSPRNG_BUILD_NUMBER=$build_number
-
-if [[ -z "$DESIRED_PYTHON" ]]; then
-    DESIRED_PYTHON=('3.5' '3.6' '3.7')
-fi
-
-SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
-
-if [[ -z "$WIN_PACKAGE_WORK_DIR" ]]; then
-    WIN_PACKAGE_WORK_DIR="$(echo $(pwd -W) | tr '/' '\\')\\tmp_conda_$(date +%H%M%S)"
-fi
-
-mkdir -p "$WIN_PACKAGE_WORK_DIR" || true
-csprng_rootdir="$(realpath ${WIN_PACKAGE_WORK_DIR})/torchcsprng-src"
-git config --system core.longpaths true
-
-if [[ ! -d "$csprng_rootdir" ]]; then
-    rm -rf "$csprng_rootdir"
-    git clone "https://github.com/pytorch/csprng" "$csprng_rootdir"
-    pushd "$csprng_rootdir"
-    git checkout $PYTORCH_BRANCH
-    popd
-fi
-
-cd "$SOURCE_DIR"
-
-export tmp_conda="${WIN_PACKAGE_WORK_DIR}\\conda"
-export miniconda_exe="${WIN_PACKAGE_WORK_DIR}\\miniconda.exe"
-rm -rf "$tmp_conda"
-rm -f "$miniconda_exe"
-curl -sSk https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe -o "$miniconda_exe"
-"$SOURCE_DIR/install_conda.bat" && rm "$miniconda_exe"
-pushd $tmp_conda
-export PATH="$(pwd):$(pwd)/Library/usr/bin:$(pwd)/Library/bin:$(pwd)/Scripts:$(pwd)/bin:$PATH"
-popd
-retry conda install -yq conda-build
-
-ANACONDA_USER=pytorch-nightly
-conda config --set anaconda_upload no
-
-
-export TORCHCSPRNG_PACKAGE_SUFFIX=""
-if [[ "$desired_cuda" == 'cpu' ]]; then
-    export CONDA_CUDATOOLKIT_CONSTRAINT=""
-    export CONDA_CPUONLY_FEATURE="- cpuonly # [not osx]"
-    export CUDA_VERSION="None"
-else
-    export CONDA_CPUONLY_FEATURE=""
-    . ./switch_cuda_version.sh $desired_cuda
-    if [[ "$desired_cuda" == "10.2" ]]; then
-        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.2,<10.3 # [not osx]"
-    elif [[ "$desired_cuda" == "10.1" ]]; then
-        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.1,<10.2 # [not osx]"
-    elif [[ "$desired_cuda" == "10.0" ]]; then
-        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.0,<10.1 # [not osx]"
-    elif [[ "$desired_cuda" == "9.2" ]]; then
-        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=9.2,<9.3 # [not osx]"
-    elif [[ "$desired_cuda" == "9.0" ]]; then
-        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=9.0,<9.1 # [not osx]"
-    elif [[ "$desired_cuda" == "8.0" ]]; then
-        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=8.0,<8.1 # [not osx]"
-    else
-        echo "unhandled desired_cuda: $desired_cuda"
-        exit 1
-    fi
-fi
-
-if [[ -z "$PYTORCH_VERSION" ]]; then
-    export CONDA_CHANNEL_FLAGS="-c pytorch-nightly -c pytorch"
-    export PYTORCH_VERSION="$(conda search --json 'pytorch[channel=pytorch-nightly]' | \
-                                python -c "import os, sys, json, re; cuver = '$cuver'; \
-                                cuver = cuver.replace('cu', 'cuda') if cuver != 'cpu' else cuver; \
-                                print(re.sub(r'\\+.*$', '', \
-                                [x['version'] for x in json.load(sys.stdin)['pytorch'] \
-                                    if (x['platform'] == 'darwin' or cuver in x['fn']) \
-                                    and 'py' + os.environ['DESIRED_PYTHON'] in x['fn']][-1]))")"
-    if [[ -z "$PYTORCH_VERSION" ]]; then
-        echo "PyTorch version auto detection failed"
-        echo "No package found for desired_cuda=$desired_cuda and DESIRED_PYTHON=$DESIRED_PYTHON"
-        exit 1
-    fi
-else
-    export CONDA_CHANNEL_FLAGS="-c pytorch -c pytorch-nightly"
-fi
-if [[ "$desired_cuda" == 'cpu' ]]; then
-    export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==$PYTORCH_VERSION"
-    export CONDA_PYTORCH_CONSTRAINT="- pytorch==$PYTORCH_VERSION"
-else
-    export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==${PYTORCH_VERSION}"
-    export CONDA_PYTORCH_CONSTRAINT="- pytorch==${PYTORCH_VERSION}"
-fi
-
-# Loop through all Python versions to build a package for each
-for py_ver in "${DESIRED_PYTHON[@]}"; do
-    build_string="py${py_ver}_${build_string_suffix}"
-    folder_tag="${build_string}_$(date +'%Y%m%d')"
-
-    # Create the conda package into this temporary folder. This is so we can find
-    # the package afterwards, as there's no easy way to extract the final filename
-    # from conda-build
-    output_folder="out_$folder_tag"
-    rm -rf "$output_folder"
-    mkdir "$output_folder"
-
-    if [[ "$py_ver" == 3.5 ]]; then
-      export CONDA_TYPING_CONSTRAINT="- typing"
-    else
-      export CONDA_TYPING_CONSTRAINT=""
-    fi
-
-    export VSTOOLCHAIN_PACKAGE=vs2017
-
-    # We need to build the compiler activation scripts first on Windows
-    time VSDEVCMD_ARGS=${VSDEVCMD_ARGS[@]} \
-        conda build -c "$ANACONDA_USER" \
-                    --no-anaconda-upload \
-                    --output-folder "$output_folder" \
-                    ../$VSTOOLCHAIN_PACKAGE
-
-    cp ../$VSTOOLCHAIN_PACKAGE/conda_build_config.yaml ../torchcsprng/conda_build_config.yaml
-
-    conda config --set anaconda_upload no
-    echo "Calling conda-build at $(date)"
-    if [[ "$desired_cuda" == "9.2" ]]; then
-        time CMAKE_ARGS=${CMAKE_ARGS[@]} \
-            BUILD_VERSION="$TORCHCSPRNG_BUILD_VERSION" \
-            CU_VERSION="$cuver" \
-            SOURCE_ROOT_DIR="$csprng_rootdir" \
-            conda build -c "$ANACONDA_USER" \
-                        -c defaults \
-                        -c conda-forge \
-                        -c "numba/label/dev" \
-                        --no-anaconda-upload \
-                        --python "$py_ver" \
-                        --output-folder "$output_folder" \
-                        --no-verify \
-                        --no-test \
-                        ../torchcsprng
-    else
-        time CMAKE_ARGS=${CMAKE_ARGS[@]} \
-            BUILD_VERSION="$TORCHCSPRNG_BUILD_VERSION" \
-            CU_VERSION="$cuver" \
-            SOURCE_ROOT_DIR="$csprng_rootdir" \
-            conda build -c "$ANACONDA_USER" \
-                        -c defaults \
-                        -c conda-forge \
-                        --no-anaconda-upload \
-                        --python "$py_ver" \
-                        --output-folder "$output_folder" \
-                        --no-verify \
-                        --no-test \
-                        ../torchcsprng
-    fi
-    echo "Finished conda-build at $(date)"
-
-    # Extract the package for testing
-    ls -lah "$output_folder"
-    built_package="$(find $output_folder/ -name '*torchcsprng*.tar.bz2')"
-
-    # Copy the built package to the host machine for persistence before testing
-    if [[ -n "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then
-        mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true
-        cp "$built_package" "$PYTORCH_FINAL_PACKAGE_DIR/"
-    fi
-done
-
-
-set +e
diff --git a/packaging/conda/install_conda.bat b/packaging/conda/install_conda.bat
deleted file mode 100644
index 6052ad0..0000000
--- a/packaging/conda/install_conda.bat
+++ /dev/null
@@ -1 +0,0 @@
-start /wait "" "%miniconda_exe%" /S /InstallationType=JustMe /RegisterPython=0 /AddToPath=0 /D=%tmp_conda%
diff --git a/packaging/conda/switch_cuda_version.sh b/packaging/conda/switch_cuda_version.sh
deleted file mode 100755
index 342def9..0000000
--- a/packaging/conda/switch_cuda_version.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-if [[ "$OSTYPE" == "msys" ]]; then
-    CUDA_DIR="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v$1"
-else
-    CUDA_DIR="/usr/local/cuda-$1"
-fi
-
-if ! ls "$CUDA_DIR"
-then
-    echo "folder $CUDA_DIR not found to switch"
-fi
-
-echo "Switching symlink to $CUDA_DIR"
-mkdir -p /usr/local
-rm -fr /usr/local/cuda
-ln -s "$CUDA_DIR" /usr/local/cuda
-
-if [[ "$OSTYPE" == "msys" ]]; then
-    export CUDA_VERSION=`ls /usr/local/cuda/bin/cudart64*.dll | head -1 | tr '._' ' ' | cut -d ' ' -f2`
-    export CUDNN_VERSION=`ls /usr/local/cuda/bin/cudnn64*.dll | head -1 | tr '._' ' ' | cut -d ' ' -f2`
-else
-    export CUDA_VERSION=$(ls /usr/local/cuda/lib64/libcudart.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev)
-    export CUDNN_VERSION=$(ls /usr/local/cuda/lib64/libcudnn.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev)
-fi
-
-ls -alh /usr/local/cuda
-
-echo "CUDA_VERSION=$CUDA_VERSION"
-echo "CUDNN_VERSION=$CUDNN_VERSION"
diff --git a/packaging/pkg_helpers.bash b/packaging/pkg_helpers.bash
deleted file mode 100644
index dad9622..0000000
--- a/packaging/pkg_helpers.bash
+++ /dev/null
@@ -1,382 +0,0 @@
-# A set of useful bash functions for common functionality we need to do in
-# many build scripts
-
-
-# Setup CUDA environment variables, based on CU_VERSION
-#
-# Inputs:
-#   CU_VERSION (cpu, cu92, cu100)
-#   NO_CUDA_PACKAGE (bool)
-#   BUILD_TYPE (conda, wheel)
-#
-# Outputs:
-#   VERSION_SUFFIX (e.g., "")
-#   PYTORCH_VERSION_SUFFIX (e.g., +cpu)
-#   WHEEL_DIR (e.g., cu100/)
-#   CUDA_HOME (e.g., /usr/local/cuda-9.2, respected by torch.utils.cpp_extension)
-#   FORCE_CUDA (respected by torchcsprng setup.py)
-#   NVCC_FLAGS (respected by torchcsprng setup.py)
-#
-# Precondition: CUDA versions are installed in their conventional locations in
-# /usr/local/cuda-*
-#
-# NOTE: Why VERSION_SUFFIX versus PYTORCH_VERSION_SUFFIX?  If you're building
-# a package with CUDA on a platform we support CUDA on, VERSION_SUFFIX ==
-# PYTORCH_VERSION_SUFFIX and everyone is happy.  However, if you are building a
-# package with only CPU bits (e.g., torchaudio), then VERSION_SUFFIX is always
-# empty, but PYTORCH_VERSION_SUFFIX is +cpu (because that's how you get a CPU
-# version of a Python package.  But that doesn't apply if you're on OS X,
-# since the default CU_VERSION on OS X is cpu.
-setup_cuda() {
-
-  # First, compute version suffixes.  By default, assume no version suffixes
-  export VERSION_SUFFIX=""
-  export PYTORCH_VERSION_SUFFIX=""
-  export WHEEL_DIR=""
-  # Wheel builds need suffixes (but not if they're on OS X, which never has suffix)
-  if [[ "$BUILD_TYPE" == "wheel" ]] && [[ "$(uname)" != Darwin ]]; then
-    # The default CUDA has no suffix
-    if [[ "$CU_VERSION" != "cu102" ]]; then
-      export PYTORCH_VERSION_SUFFIX="+$CU_VERSION"
-    fi
-    # Match the suffix scheme of pytorch, unless this package does not have
-    # CUDA builds (in which case, use default)
-    if [[ -z "$NO_CUDA_PACKAGE" ]]; then
-      export VERSION_SUFFIX="$PYTORCH_VERSION_SUFFIX"
-      export WHEEL_DIR="$CU_VERSION/"
-    fi
-  fi
-
-  # Now work out the CUDA settings
-  case "$CU_VERSION" in
-    cu112)
-      if [[ "$OSTYPE" == "msys" ]]; then
-        export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.2"
-      else
-        export CUDA_HOME=/usr/local/cuda-11.2/
-      fi
-      export FORCE_CUDA=1
-      # Hard-coding gencode flags is temporary situation until
-      # https://github.com/pytorch/pytorch/pull/23408 lands
-      export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50"
-      ;;
-    cu111)
-      if [[ "$OSTYPE" == "msys" ]]; then
-        export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.1"
-      else
-        export CUDA_HOME=/usr/local/cuda-11.1/
-      fi
-      export FORCE_CUDA=1
-      # Hard-coding gencode flags is temporary situation until
-      # https://github.com/pytorch/pytorch/pull/23408 lands
-      export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50"
-      ;;
-    cu110)
-      if [[ "$OSTYPE" == "msys" ]]; then
-        export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.0"
-      else
-        export CUDA_HOME=/usr/local/cuda-11.0/
-      fi
-      export FORCE_CUDA=1
-      # Hard-coding gencode flags is temporary situation until
-      # https://github.com/pytorch/pytorch/pull/23408 lands
-      export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_50,code=compute_50"
-      ;;
-    cu102)
-      if [[ "$OSTYPE" == "msys" ]]; then
-        export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2"
-      else
-        export CUDA_HOME=/usr/local/cuda-10.2/
-      fi
-      export FORCE_CUDA=1
-      # Hard-coding gencode flags is temporary situation until
-      # https://github.com/pytorch/pytorch/pull/23408 lands
-      export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50"
-      ;;
-    cu101)
-      if [[ "$OSTYPE" == "msys" ]]; then
-        export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.1"
-      else
-        export CUDA_HOME=/usr/local/cuda-10.1/
-      fi
-      export FORCE_CUDA=1
-      # Hard-coding gencode flags is temporary situation until
-      # https://github.com/pytorch/pytorch/pull/23408 lands
-      export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50"
-      ;;
-    cu100)
-      if [[ "$OSTYPE" == "msys" ]]; then
-        export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.0"
-      else
-        export CUDA_HOME=/usr/local/cuda-10.0/
-      fi
-      export FORCE_CUDA=1
-      # Hard-coding gencode flags is temporary situation until
-      # https://github.com/pytorch/pytorch/pull/23408 lands
-      export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50"
-      ;;
-    cu92)
-      if [[ "$OSTYPE" == "msys" ]]; then
-        export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.2"
-      else
-        export CUDA_HOME=/usr/local/cuda-9.2/
-      fi
-      export FORCE_CUDA=1
-      export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50"
-      ;;
-    cpu)
-      ;;
-    *)
-      echo "Unrecognized CU_VERSION=$CU_VERSION"
-      exit 1
-      ;;
-  esac
-}
-
-# Populate build version if necessary, and add version suffix
-#
-# Inputs:
-#   BUILD_VERSION (e.g., 0.2.0 or empty)
-#   VERSION_SUFFIX (e.g., +cpu)
-#
-# Outputs:
-#   BUILD_VERSION (e.g., 0.2.0.dev20190807+cpu)
-#
-# Fill BUILD_VERSION if it doesn't exist already with a nightly string
-# Usage: setup_build_version 0.2.0
-setup_build_version() {
-  if [[ -z "$BUILD_VERSION" ]]; then
-    export BUILD_VERSION="$1.dev$(date "+%Y%m%d")$VERSION_SUFFIX"
-  else
-    export BUILD_VERSION="$BUILD_VERSION$VERSION_SUFFIX"
-  fi
-
-  # Set build version based on tag if on tag
-  if [[ -n "${CIRCLE_TAG}" ]]; then
-    # Strip tag
-    export BUILD_VERSION="$(echo "${CIRCLE_TAG}" | sed -e 's/^v//' -e 's/-.*$//')${VERSION_SUFFIX}"
-  fi
-}
-
-# Set some useful variables for OS X, if applicable
-setup_macos() {
-  if [[ "$(uname)" == Darwin ]]; then
-    export MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++
-  fi
-}
-
-# set variable to determine whether the typing library needs to be built in
-setup_typing() {
-  if [[ "$PYTHON_VERSION" == 3.5 ]]; then
-    export CONDA_TYPING_CONSTRAINT="- typing"
-  else
-    export CONDA_TYPING_CONSTRAINT=""
-  fi
-}
-
-# Top-level entry point for things every package will need to do
-#
-# Usage: setup_env 0.2.0
-setup_env() {
-  setup_cuda
-  setup_build_version "$1"
-  setup_macos
-  setup_typing
-}
-
-# Function to retry functions that sometimes timeout or have flaky failures
-retry () {
-    $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
-}
-
-# Inputs:
-#   PYTHON_VERSION (2.7, 3.5, 3.6, 3.7)
-#   UNICODE_ABI (bool)
-#
-# Outputs:
-#   PATH modified to put correct Python version in PATH
-#
-# Precondition: If Linux, you are in a soumith/manylinux-cuda* Docker image
-setup_wheel_python() {
-  if [[ "$(uname)" == Darwin || "$OSTYPE" == "msys" ]]; then
-    eval "$(conda shell.bash hook)"
-    conda env remove -n "env$PYTHON_VERSION" || true
-    conda create -yn "env$PYTHON_VERSION" python="$PYTHON_VERSION"
-    conda activate "env$PYTHON_VERSION"
-  else
-    case "$PYTHON_VERSION" in
-      2.7)
-        if [[ -n "$UNICODE_ABI" ]]; then
-          python_abi=cp27-cp27mu
-        else
-          python_abi=cp27-cp27m
-        fi
-        ;;
-      3.5) python_abi=cp35-cp35m ;;
-      3.6) python_abi=cp36-cp36m ;;
-      3.7) python_abi=cp37-cp37m ;;
-      3.8) python_abi=cp38-cp38 ;;
-      3.9) python_abi=cp39-cp39 ;;
-      *)
-        echo "Unrecognized PYTHON_VERSION=$PYTHON_VERSION"
-        exit 1
-        ;;
-    esac
-    # Download all the dependencies required to compile image and video_reader
-    # extensions
-
-    mkdir -p ext_libraries
-    pushd ext_libraries
-    popd
-    export PATH="/opt/python/$python_abi/bin:$(pwd)/ext_libraries/bin:$PATH"
-  fi
-}
-
-# Install with pip a bit more robustly than the default
-pip_install() {
-  retry pip install --progress-bar off "$@"
-}
-
-# Install torch with pip, respecting PYTORCH_VERSION, and record the installed
-# version into PYTORCH_VERSION, if applicable
-setup_pip_pytorch_version() {
-  if [[ -z "$PYTORCH_VERSION" ]]; then
-    # Install latest prerelease version of torch, per our nightlies, consistent
-    # with the requested cuda version
-    pip_install --pre torch -f "https://download.pytorch.org/whl/nightly/${WHEEL_DIR}torch_nightly.html"
-    if [[ "$CUDA_VERSION" == "cpu" ]]; then
-      # CUDA and CPU are ABI compatible on the CPU-only parts, so strip
-      # in this case
-      export PYTORCH_VERSION="$(pip show torch | grep ^Version: | sed 's/Version:  *//' | sed 's/+.\+//')"
-    else
-      export PYTORCH_VERSION="$(pip show torch | grep ^Version: | sed 's/Version:  *//')"
-    fi
-  else
-    pip_install "torch==$PYTORCH_VERSION$PYTORCH_VERSION_SUFFIX" \
-      -f "https://download.pytorch.org/whl/${CU_VERSION}/torch_stable.html" \
-      -f "https://download.pytorch.org/whl/${UPLOAD_CHANNEL}/${CU_VERSION}/torch_${UPLOAD_CHANNEL}.html"
-  fi
-}
-
-# Fill PYTORCH_VERSION with the latest conda nightly version, and
-# CONDA_CHANNEL_FLAGS with appropriate flags to retrieve these versions
-#
-# You MUST have populated PYTORCH_VERSION_SUFFIX before hand.
-setup_conda_pytorch_constraint() {
-  if [[ -z "$PYTORCH_VERSION" ]]; then
-    export CONDA_CHANNEL_FLAGS="-c pytorch-nightly -c pytorch"
-    export PYTORCH_VERSION="$(conda search --json 'pytorch[channel=pytorch-nightly]' | \
-                              python -c "import os, sys, json, re; cuver = os.environ.get('CU_VERSION'); \
-                               cuver_1 = cuver.replace('cu', 'cuda') if cuver != 'cpu' else cuver; \
-                               cuver_2 = (cuver[:-1] + '.' + cuver[-1]).replace('cu', 'cuda') if cuver != 'cpu' else cuver; \
-                               print(re.sub(r'\\+.*$', '', \
-                                [x['version'] for x in json.load(sys.stdin)['pytorch'] \
-                                  if (x['platform'] == 'darwin' or cuver_1 in x['fn'] or cuver_2 in x['fn']) \
-                                    and 'py' + os.environ['PYTHON_VERSION'] in x['fn']][-1]))")"
-    if [[ -z "$PYTORCH_VERSION" ]]; then
-      echo "PyTorch version auto detection failed"
-      echo "No package found for CU_VERSION=$CU_VERSION and PYTHON_VERSION=$PYTHON_VERSION"
-      exit 1
-    fi
-  else
-    export CONDA_CHANNEL_FLAGS="-c pytorch -c pytorch-${UPLOAD_CHANNEL}"
-  fi
-  if [[ "$CU_VERSION" == cpu ]]; then
-    export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==$PYTORCH_VERSION${PYTORCH_VERSION_SUFFIX}"
-    export CONDA_PYTORCH_CONSTRAINT="- pytorch==$PYTORCH_VERSION"
-  else
-    export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==${PYTORCH_VERSION}${PYTORCH_VERSION_SUFFIX}"
-    export CONDA_PYTORCH_CONSTRAINT="- pytorch==${PYTORCH_VERSION}${PYTORCH_VERSION_SUFFIX}"
-  fi
-  if [[ "$OSTYPE" == msys && "$CU_VERSION" == cu92 ]]; then
-    export CONDA_CHANNEL_FLAGS="${CONDA_CHANNEL_FLAGS} -c defaults -c numba/label/dev"
-  fi
-}
-
-# Translate CUDA_VERSION into CUDA_CUDATOOLKIT_CONSTRAINT
-setup_conda_cudatoolkit_constraint() {
-  export CONDA_CPUONLY_FEATURE=""
-  if [[ "$(uname)" == Darwin ]]; then
-    export CONDA_CUDATOOLKIT_CONSTRAINT=""
-  else
-    case "$CU_VERSION" in
-      cu112)
-        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.2,<11.3 # [not osx]"
-        ;;
-      cu111)
-        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.1,<11.2 # [not osx]"
-        ;;
-      cu110)
-        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.0,<11.1 # [not osx]"
-        ;;
-      cu102)
-        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.2,<10.3 # [not osx]"
-        ;;
-      cu101)
-        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.1,<10.2 # [not osx]"
-        ;;
-      cu100)
-        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.0,<10.1 # [not osx]"
-        ;;
-      cu92)
-        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=9.2,<9.3 # [not osx]"
-        ;;
-      cpu)
-        export CONDA_CUDATOOLKIT_CONSTRAINT=""
-        export CONDA_CPUONLY_FEATURE="- cpuonly"
-        ;;
-      *)
-        echo "Unrecognized CU_VERSION=$CU_VERSION"
-        exit 1
-        ;;
-    esac
-  fi
-}
-
-setup_conda_cudatoolkit_plain_constraint() {
-  export CONDA_CPUONLY_FEATURE=""
-  export CMAKE_USE_CUDA=1
-  if [[ "$(uname)" == Darwin ]]; then
-    export CONDA_CUDATOOLKIT_CONSTRAINT=""
-    export CMAKE_USE_CUDA=0
-  else
-    case "$CU_VERSION" in
-      cu102)
-        export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit=10.2"
-        ;;
-      cu101)
-        export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit=10.1"
-        ;;
-      cu100)
-        export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit=10.0"
-        ;;
-      cu92)
-        export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit=9.2"
-        ;;
-      cpu)
-        export CONDA_CUDATOOLKIT_CONSTRAINT=""
-        export CONDA_CPUONLY_FEATURE="cpuonly"
-        export CMAKE_USE_CUDA=0
-        ;;
-      *)
-        echo "Unrecognized CU_VERSION=$CU_VERSION"
-        exit 1
-        ;;
-    esac
-  fi
-}
-
-# Build the proper compiler package before building the final package
-setup_visual_studio_constraint() {
-  if [[ "$OSTYPE" == "msys" ]]; then
-      export VSTOOLCHAIN_PACKAGE=vs$VC_YEAR
-      conda build $CONDA_CHANNEL_FLAGS --no-anaconda-upload packaging/$VSTOOLCHAIN_PACKAGE
-      cp packaging/$VSTOOLCHAIN_PACKAGE/conda_build_config.yaml packaging/torchcsprng/conda_build_config.yaml
-  fi
-}
-
-setup_junit_results_folder() {
-  if [[ "$CI" == "true" ]]; then
-    export CONDA_PYTORCH_BUILD_RESULTS_DIRECTORY="${SOURCE_ROOT_DIR}/build_results/results.xml"
-  fi
-}
diff --git a/packaging/torchcsprng/bld.bat b/packaging/torchcsprng/bld.bat
deleted file mode 100644
index 8c7c833..0000000
--- a/packaging/torchcsprng/bld.bat
+++ /dev/null
@@ -1,27 +0,0 @@
-@echo on
-
-set TORCHCSPRNG_BUILD_VERSION=%PKG_VERSION%
-set TORCHCSPRNG_BUILD_NUMBER=%PKG_BUILDNUM%
-
-set build_with_cuda=
-
-if "%CUDA_VERSION%" == "None" goto cuda_flags_end
-if "%CUDA_VERSION%" == "cpu" goto cuda_flags_end
-if "%CUDA_VERSION%" == "" goto cuda_flags_end
-
-set build_with_cuda=1
-set desired_cuda=%CUDA_VERSION:~0,-1%.%CUDA_VERSION:~-1,1%
-
-set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%desired_cuda%
-set CUDA_BIN_PATH=%CUDA_PATH%\bin
-set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr
-if "%desired_cuda%" == "9.0" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50
-if "%desired_cuda%" == "9.2" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_61,code=sm_61 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50
-if "%desired_cuda%" == "10.0" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50
-if "%desired_cuda%" == "10.1" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50
-if "%desired_cuda%" == "10.2" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50
-
-:cuda_flags_end
-
-python setup.py install --single-version-externally-managed --record=record.txt
-if errorlevel 1 exit /b 1
diff --git a/packaging/torchcsprng/conda_build_config.yaml b/packaging/torchcsprng/conda_build_config.yaml
deleted file mode 100644
index 257515c..0000000
--- a/packaging/torchcsprng/conda_build_config.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
-channel_sources:
-  - pytorch-nightly,pytorch,defaults
-blas_impl:
-  - mkl                        # [x86_64]
-c_compiler:
-  - vs2017                     # [win]
-cxx_compiler:
-  - vs2017                     # [win]
-python:
-  - 3.5
-  - 3.6
-# This differs from target_platform in that it determines what subdir the compiler
-#    will target, not what subdir the compiler package will be itself.
-#    For example, we need a win-64 vs2008_win-32 package, so that we compile win-32
-#    code on win-64 miniconda.
-cross_compiler_target_platform:
-  - win-64                     # [win]
-target_platform:
-  - win-64                     # [win]
-vc:
-  - 14
-zip_keys:
-  -                             # [win]
-    - vc                        # [win]
-    - c_compiler                # [win]
-    - cxx_compiler              # [win]
diff --git a/packaging/torchcsprng/meta.yaml b/packaging/torchcsprng/meta.yaml
deleted file mode 100644
index 1b4570d..0000000
--- a/packaging/torchcsprng/meta.yaml
+++ /dev/null
@@ -1,56 +0,0 @@
-package:
-  name: torchcsprng
-  version: "{{ environ.get('BUILD_VERSION') }}"
-
-source:
-  path: "{{ environ.get('SOURCE_ROOT_DIR') }}"
-
-requirements:
-  build:
-    - {{ compiler('c') }} # [win]
-    - {{ compiler('cxx') }}
-
-  host:
-    - python
-    - setuptools
-    {{ environ.get('CONDA_PYTORCH_BUILD_CONSTRAINT') }}
-    {{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT') }}
-    {{ environ.get('CONDA_CPUONLY_FEATURE') }}
-
-  run:
-    - python
-    - pillow >=4.1.1
-    - numpy >=1.11
-    {{ environ.get('CONDA_PYTORCH_CONSTRAINT') }}
-    {{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT') }}
-
-build:
-  string: py{{py}}_{{ environ['CU_VERSION'] }}
-  script: python setup.py install --single-version-externally-managed --record=record.txt # [not win]
-  script_env:
-    - CU_VERSION
-    - CUDA_HOME
-    - FORCE_CUDA
-    - NVCC_FLAGS
-    - BUILD_VERSION
-  features:
-    {{ environ.get('CONDA_CPUONLY_FEATURE') }}
-
-#test:
-#  imports:
-#    - torch
-#    - torchcsprng
-#  source_files:
-#    - test
-#  requires:
-#    - pytest
-#    - scipy
-#    - pycrypto
-#  commands:
-#    pytest . --verbose
-
-about:
-  home: https://github.com/pytorch/csprng
-  license: BSD
-  license_file: LICENSE
-  summary: 'Cryptographically secure pseudorandom number generators for PyTorch'
diff --git a/packaging/vs2017/activate.bat b/packaging/vs2017/activate.bat
deleted file mode 100644
index ccecfc2..0000000
--- a/packaging/vs2017/activate.bat
+++ /dev/null
@@ -1,44 +0,0 @@
-:: Set env vars that tell distutils to use the compiler that we put on path
-SET DISTUTILS_USE_SDK=1
-SET MSSdk=1
-
-SET "VS_VERSION=15.0"
-SET "VS_MAJOR=15"
-SET "VS_YEAR=2017"
-
-set "MSYS2_ARG_CONV_EXCL=/AI;/AL;/OUT;/out"
-set "MSYS2_ENV_CONV_EXCL=CL"
-
-:: For Python 3.5+, ensure that we link with the dynamic runtime.  See
-:: http://stevedower.id.au/blog/building-for-python-3-5-part-two/ for more info
-set "PY_VCRUNTIME_REDIST=%PREFIX%\\bin\\vcruntime140.dll"
-
-for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do (
-    if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
-        set "VSINSTALLDIR=%%i\"
-        goto :vswhere
-    )
-)
-
-:vswhere
-
-:: Shorten PATH to avoid the `input line too long` error.
-SET MyPath=%PATH%
-
-setlocal EnableDelayedExpansion
-
-SET TempPath="%MyPath:;=";"%"
-SET var=
-FOR %%a IN (%TempPath%) DO (
-    IF EXIST %%~sa (
-        SET "var=!var!;%%~sa"
-    )
-)
-
-set "TempPath=!var:~1!"
-endlocal & set "PATH=%TempPath%"
-
-:: Shorten current directory too
-FOR %%A IN (.) DO CD "%%~sA"
-
-:: other things added by install_activate.bat at package build time
diff --git a/packaging/vs2017/conda_build_config.yaml b/packaging/vs2017/conda_build_config.yaml
deleted file mode 100644
index 5188bb0..0000000
--- a/packaging/vs2017/conda_build_config.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
-blas_impl:
-  - mkl                        # [x86_64]
-c_compiler:
-  - vs2017                     # [win]
-cxx_compiler:
-  - vs2017                     # [win]
-python:
-  - 3.5
-  - 3.6
-# This differs from target_platform in that it determines what subdir the compiler
-#    will target, not what subdir the compiler package will be itself.
-#    For example, we need a win-64 vs2008_win-32 package, so that we compile win-32
-#    code on win-64 miniconda.
-cross_compiler_target_platform:
-  - win-64                     # [win]
-target_platform:
-  - win-64                     # [win]
-vc:
-  - 14
-zip_keys:
-  -                             # [win]
-    - vc                        # [win]
-    - c_compiler                # [win]
-    - cxx_compiler              # [win]
diff --git a/packaging/vs2017/install_activate.bat b/packaging/vs2017/install_activate.bat
deleted file mode 100644
index de0e6ff..0000000
--- a/packaging/vs2017/install_activate.bat
+++ /dev/null
@@ -1,30 +0,0 @@
-set YEAR=2017
-set VER=15
-
-mkdir "%PREFIX%\etc\conda\activate.d"
-COPY "%RECIPE_DIR%\activate.bat" "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-
-IF "%cross_compiler_target_platform%" == "win-64" (
-  set "target_platform=amd64"
-  echo SET "CMAKE_GENERATOR=Visual Studio %VER% %YEAR% Win64" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-  echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-  IF "%VSDEVCMD_ARGS%" == "" (
-    echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x64 >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-    echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-    echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-    echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-  ) ELSE (
-    echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x64 %VSDEVCMD_ARGS% >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-    echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-    echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-    echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 %VSDEVCMD_ARGS% >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-  )
-  echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-  ) else (
-  set "target_platform=x86"
-  echo SET "CMAKE_GENERATOR=Visual Studio %VER% %YEAR%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-  echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-  echo CALL "VC\Auxiliary\Build\vcvars32.bat" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-  echo popd
-  )
-
diff --git a/packaging/vs2017/install_runtime.bat b/packaging/vs2017/install_runtime.bat
deleted file mode 100644
index 5163c16..0000000
--- a/packaging/vs2017/install_runtime.bat
+++ /dev/null
@@ -1,49 +0,0 @@
-set VC_PATH=x86
-if "%ARCH%"=="64" (
-   set VC_PATH=x64
-)
-
-set MSC_VER=2017
-
-rem :: This should always be present for VC installed with VS.  Not sure about VC installed with Visual C++ Build Tools 2015
-rem FOR /F "usebackq tokens=3*" %%A IN (`REG QUERY "HKEY_LOCAL_MACHINE\Software\Microsoft\DevDiv\VC\Servicing\14.0\IDE.x64" /v UpdateVersion`) DO (
-rem     set SP=%%A
-rem     )
-
-rem if not "%SP%" == "%PKG_VERSION%" (
-rem    echo "Version detected from registry: %SP%"
-rem    echo    "does not match version of package being built (%PKG_VERSION%)"
-rem    echo "Do you have current updates for VS 2015 installed?"
-rem    exit 1
-rem )
-
-
-REM ========== REQUIRES Win 10 SDK be installed, or files otherwise copied to location below!
-robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%"  "%LIBRARY_BIN%" *.dll /E
-robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%"  "%PREFIX%" *.dll /E
-if %ERRORLEVEL% GEQ 8 exit 1
-
-REM ========== This one comes from visual studio 2017
-set "VC_VER=141"
-
-for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do (
-    if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
-        set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat"
-        goto :eof
-    )
-)
-
-@setlocal
-call "%VS15VARSALL%" x64
-
-set "REDIST_ROOT=%VCToolsRedistDir%%VC_PATH%"
-
-robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%LIBRARY_BIN%" *.dll /E
-if %ERRORLEVEL% LSS 8 exit 0
-robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%PREFIX%" *.dll /E
-if %ERRORLEVEL% LSS 8 exit 0
-robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%LIBRARY_BIN%" *.dll /E
-if %ERRORLEVEL% LSS 8 exit 0
-robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%PREFIX%" *.dll /E
-if %ERRORLEVEL% LSS 8 exit 0
-@endlocal
diff --git a/packaging/vs2017/meta.yaml b/packaging/vs2017/meta.yaml
deleted file mode 100644
index 1f56952..0000000
--- a/packaging/vs2017/meta.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
-{% set vcver="14.1" %}
-{% set vcfeature="14" %}
-{% set vsyear="2017" %}
-{% set fullver="15.4.27004.2010" %}
-
-package:
-  name: vs{{ vsyear }}
-  version: {{ fullver }}
-
-build:
-  skip: True  [not win]
-  script_env:
-    - VSDEVCMD_ARGS # [win]
-
-outputs:
-  - name: vs{{ vsyear }}_{{ cross_compiler_target_platform }}
-    script: install_activate.bat
-    track_features:
-      # VS 2017 is binary-compatible with VS 2015/vc14.  Tools are "v141".
-      strong:
-        - vc{{ vcfeature }}
-    about:
-      summary: Activation and version verification of MSVC {{ vcver }} (VS {{ vsyear }}) compiler
-      license: BSD 3-clause
diff --git a/packaging/vs2019/activate.bat b/packaging/vs2019/activate.bat
deleted file mode 100644
index 6f607ba..0000000
--- a/packaging/vs2019/activate.bat
+++ /dev/null
@@ -1,44 +0,0 @@
-:: Set env vars that tell distutils to use the compiler that we put on path
-SET DISTUTILS_USE_SDK=1
-SET MSSdk=1
-
-SET "VS_VERSION=16.0"
-SET "VS_MAJOR=16"
-SET "VS_YEAR=2019"
-
-set "MSYS2_ARG_CONV_EXCL=/AI;/AL;/OUT;/out"
-set "MSYS2_ENV_CONV_EXCL=CL"
-
-:: For Python 3.5+, ensure that we link with the dynamic runtime.  See
-:: http://stevedower.id.au/blog/building-for-python-3-5-part-two/ for more info
-set "PY_VCRUNTIME_REDIST=%PREFIX%\\bin\\vcruntime140.dll"
-
-for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [16^,17^) -property installationPath`) do (
-    if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
-        set "VSINSTALLDIR=%%i\"
-        goto :vswhere
-    )
-)
-
-:vswhere
-
-:: Shorten PATH to avoid the `input line too long` error.
-SET MyPath=%PATH%
-
-setlocal EnableDelayedExpansion
-
-SET TempPath="%MyPath:;=";"%"
-SET var=
-FOR %%a IN (%TempPath%) DO (
-    IF EXIST %%~sa (
-        SET "var=!var!;%%~sa"
-    )
-)
-
-set "TempPath=!var:~1!"
-endlocal & set "PATH=%TempPath%"
-
-:: Shorten current directory too
-FOR %%A IN (.) DO CD "%%~sA"
-
-:: other things added by install_activate.bat at package build time
diff --git a/packaging/vs2019/conda_build_config.yaml b/packaging/vs2019/conda_build_config.yaml
deleted file mode 100644
index 358052e..0000000
--- a/packaging/vs2019/conda_build_config.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
-blas_impl:
-  - mkl                        # [x86_64]
-c_compiler:
-  - vs2019                     # [win]
-cxx_compiler:
-  - vs2019                     # [win]
-python:
-  - 3.5
-  - 3.6
-# This differs from target_platform in that it determines what subdir the compiler
-#    will target, not what subdir the compiler package will be itself.
-#    For example, we need a win-64 vs2008_win-32 package, so that we compile win-32
-#    code on win-64 miniconda.
-cross_compiler_target_platform:
-  - win-64                     # [win]
-target_platform:
-  - win-64                     # [win]
-vc:
-  - 14
-zip_keys:
-  -                             # [win]
-    - vc                        # [win]
-    - c_compiler                # [win]
-    - cxx_compiler              # [win]
diff --git a/packaging/vs2019/install_activate.bat b/packaging/vs2019/install_activate.bat
deleted file mode 100644
index 3c38253..0000000
--- a/packaging/vs2019/install_activate.bat
+++ /dev/null
@@ -1,30 +0,0 @@
-set YEAR=2019
-set VER=16
-
-mkdir "%PREFIX%\etc\conda\activate.d"
-COPY "%RECIPE_DIR%\activate.bat" "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-
-IF "%cross_compiler_target_platform%" == "win-64" (
-  set "target_platform=amd64"
-  echo SET "CMAKE_GENERATOR=Visual Studio %VER% %YEAR% Win64" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-  echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-  IF "%VSDEVCMD_ARGS%" == "" (
-    echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x64 >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-    echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-    echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-    echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-  ) ELSE (
-    echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x64 %VSDEVCMD_ARGS% >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-    echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-    echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-    echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 %VSDEVCMD_ARGS% >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-  )
-  echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-  ) else (
-  set "target_platform=x86"
-  echo SET "CMAKE_GENERATOR=Visual Studio %VER% %YEAR%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-  echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-  echo CALL "VC\Auxiliary\Build\vcvars32.bat" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-  echo popd
-  )
-
diff --git a/packaging/vs2019/install_runtime.bat b/packaging/vs2019/install_runtime.bat
deleted file mode 100644
index e09a5cc..0000000
--- a/packaging/vs2019/install_runtime.bat
+++ /dev/null
@@ -1,49 +0,0 @@
-set VC_PATH=x86
-if "%ARCH%"=="64" (
-   set VC_PATH=x64
-)
-
-set MSC_VER=2019
-
-rem :: This should always be present for VC installed with VS.  Not sure about VC installed with Visual C++ Build Tools 2015
-rem FOR /F "usebackq tokens=3*" %%A IN (`REG QUERY "HKEY_LOCAL_MACHINE\Software\Microsoft\DevDiv\VC\Servicing\14.0\IDE.x64" /v UpdateVersion`) DO (
-rem     set SP=%%A
-rem     )
-
-rem if not "%SP%" == "%PKG_VERSION%" (
-rem    echo "Version detected from registry: %SP%"
-rem    echo    "does not match version of package being built (%PKG_VERSION%)"
-rem    echo "Do you have current updates for VS 2015 installed?"
-rem    exit 1
-rem )
-
-
-REM ========== REQUIRES Win 10 SDK be installed, or files otherwise copied to location below!
-robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%"  "%LIBRARY_BIN%" *.dll /E
-robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%"  "%PREFIX%" *.dll /E
-if %ERRORLEVEL% GEQ 8 exit 1
-
-REM ========== This one comes from visual studio 2019
-set "VC_VER=142"
-
-for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [16^,17^) -property installationPath`) do (
-    if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
-        set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat"
-        goto :eof
-    )
-)
-
-@setlocal
-call "%VS15VARSALL%" x64
-
-set "REDIST_ROOT=%VCToolsRedistDir%%VC_PATH%"
-
-robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%LIBRARY_BIN%" *.dll /E
-if %ERRORLEVEL% LSS 8 exit 0
-robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%PREFIX%" *.dll /E
-if %ERRORLEVEL% LSS 8 exit 0
-robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%LIBRARY_BIN%" *.dll /E
-if %ERRORLEVEL% LSS 8 exit 0
-robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%PREFIX%" *.dll /E
-if %ERRORLEVEL% LSS 8 exit 0
-@endlocal
diff --git a/packaging/vs2019/meta.yaml b/packaging/vs2019/meta.yaml
deleted file mode 100644
index 94a0ed4..0000000
--- a/packaging/vs2019/meta.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
-{% set vcver="14.2" %}
-{% set vcfeature="14" %}
-{% set vsyear="2019" %}
-{% set fullver="15.4.27004.2010" %}
-
-package:
-  name: vs{{ vsyear }}
-  version: {{ fullver }}
-
-build:
-  skip: True  [not win]
-  script_env:
-    - VSDEVCMD_ARGS # [win]
-
-outputs:
-  - name: vs{{ vsyear }}_{{ cross_compiler_target_platform }}
-    script: install_activate.bat
-    track_features:
-      # VS 2019 is binary-compatible with VS 2017/vc 14.1 and 2015/vc14.  Tools are "v142".
-      strong:
-        - vc{{ vcfeature }}
-    about:
-      summary: Activation and version verification of MSVC {{ vcver }} (VS {{ vsyear }}) compiler
-      license: BSD 3-clause
diff --git a/packaging/wheel/linux_manywheel.sh b/packaging/wheel/linux_manywheel.sh
deleted file mode 100644
index d6471aa..0000000
--- a/packaging/wheel/linux_manywheel.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/bin/bash
-set -ex
-
-if [ "$#" -ne 1 ]; then
-    echo "Illegal number of parameters. Pass cuda version"
-    echo "CUDA version should be cu92, cu100 or cpu"
-    exit 1
-fi
-export CUVER="$1" # cu[0-9]* cpu
-
-if [[ "$CUVER" == "cu102" ]]; then
-  cu_suffix=""
-else
-  cu_suffix="+$CUVER"
-fi
-
-export TORCHCSPRNG_BUILD_VERSION="0.4.0.dev$(date "+%Y%m%d")${cu_suffix}"
-export TORCHCSPRNG_BUILD_NUMBER="1"
-export TORCHCSPRNG_LOCAL_VERSION_LABEL="$CUVER"
-export OUT_DIR="/remote/$CUVER"
-
-pushd /opt/python
-DESIRED_PYTHON=(*/)
-popd
-for desired_py in "${DESIRED_PYTHON[@]}"; do
-    python_installations+=("/opt/python/$desired_py")
-done
-
-OLD_PATH=$PATH
-cd /tmp
-rm -rf csprng
-git clone https://github.com/pytorch/csprng
-
-cd /tmp/csprng
-
-for PYDIR in "${python_installations[@]}"; do
-    export PATH=$PYDIR/bin:$OLD_PATH
-    pip install --upgrade pip
-    pip install numpy pyyaml future
-
-    pip uninstall -y torch || true
-    pip uninstall -y torch_nightly || true
-
-    export TORCHCSPRNG_PYTORCH_DEPENDENCY_NAME=torch_nightly
-    pip install torch_nightly -f https://download.pytorch.org/whl/nightly/$CUVER/torch_nightly.html
-    # CPU/CUDA variants of PyTorch have ABI compatible PyTorch for
-    # the CPU only bits.  Therefore, we
-    # strip off the local package qualifier, but ONLY if we're
-    # doing a CPU build.
-    if [[ "$CUVER" == "cpu" ]]; then
-        export TORCHCSPRNG_PYTORCH_DEPENDENCY_VERSION="$(pip show torch_nightly | grep ^Version: | sed 's/Version: \+//' | sed 's/+.\+//')"
-    else
-        export TORCHCSPRNG_PYTORCH_DEPENDENCY_VERSION="$(pip show torch_nightly | grep ^Version: | sed 's/Version: \+//')"
-    fi
-    echo "Building against ${TORCHCSPRNG_PYTORCH_DEPENDENCY_VERSION}"
-
-    pip install ninja
-    python setup.py clean
-    python setup.py bdist_wheel
-    mkdir -p $OUT_DIR
-    cp dist/*.whl $OUT_DIR/
-done
diff --git a/packaging/wheel/osx_wheel.sh b/packaging/wheel/osx_wheel.sh
deleted file mode 100644
index 566f956..0000000
--- a/packaging/wheel/osx_wheel.sh
+++ /dev/null
@@ -1,52 +0,0 @@
-if [[ ":$PATH:" == *"conda"* ]]; then
-    echo "existing anaconda install in PATH, remove it and run script"
-    exit 1
-fi
-# download and activate anaconda
-rm -rf ~/minconda_wheel_env_tmp
-wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh && \
-    chmod +x Miniconda3-latest-MacOSX-x86_64.sh && \
-    ./Miniconda3-latest-MacOSX-x86_64.sh -b -p ~/minconda_wheel_env_tmp && \
-    rm Miniconda3-latest-MacOSX-x86_64.sh
-
-. ~/minconda_wheel_env_tmp/bin/activate
-
-
-export TORCHCSPRNG_BUILD_VERSION="0.4.0.dev$(date "+%Y%m%d")"
-export TORCHCSPRNG_BUILD_NUMBER="1"
-export OUT_DIR=~/torchcsprng_wheels
-
-export MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++
-
-pushd /tmp
-rm -rf csprng
-git clone https://github.com/pytorch/csprng
-pushd csprng
-
-desired_pythons=( "2.7" "3.5" "3.6" "3.7" )
-# for each python
-for desired_python in "${desired_pythons[@]}"
-do
-    # create and activate python env
-    env_name="env$desired_python"
-    conda create -yn $env_name python="$desired_python"
-    conda activate $env_name
-
-    pip uninstall -y torch || true
-    pip uninstall -y torch_nightly || true
-
-    export TORCHCSPRNG_PYTORCH_DEPENDENCY_NAME=torch_nightly
-    pip install torch_nightly -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
-    export TORCHCSPRNG_PYTORCH_DEPENDENCY_VERSION="$(pip show torch_nightly | grep ^Version: | sed 's/Version:  *//')"
-    echo "Building against ${TORCHAUDIO_PYTORCH_DEPENDENCY_VERSION}"
-
-    # install torchcsprng dependencies
-    pip install ninja scipy pytest pycrypto
-
-    python setup.py clean
-    python setup.py bdist_wheel
-    mkdir -p $OUT_DIR
-    cp dist/*.whl $OUT_DIR/
-done
-popd
-popd
diff --git a/packaging/wheel/relocate.py b/packaging/wheel/relocate.py
deleted file mode 100644
index fd92b68..0000000
--- a/packaging/wheel/relocate.py
+++ /dev/null
@@ -1,408 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""Helper script to package wheels and relocate binaries."""
-
-import glob
-import hashlib
-import io
-
-# Standard library imports
-import os
-import os.path as osp
-import platform
-import shutil
-import subprocess
-import sys
-import zipfile
-from base64 import urlsafe_b64encode
-
-# Third party imports
-if sys.platform == "linux":
-    from auditwheel.lddtree import lddtree
-from wheel.bdist_wheel import get_abi_tag
-
-
-ALLOWLIST = {
-    "libgcc_s.so.1",
-    "libstdc++.so.6",
-    "libm.so.6",
-    "libdl.so.2",
-    "librt.so.1",
-    "libc.so.6",
-    "libnsl.so.1",
-    "libutil.so.1",
-    "libpthread.so.0",
-    "libresolv.so.2",
-    "libX11.so.6",
-    "libXext.so.6",
-    "libXrender.so.1",
-    "libICE.so.6",
-    "libSM.so.6",
-    "libGL.so.1",
-    "libgobject-2.0.so.0",
-    "libgthread-2.0.so.0",
-    "libglib-2.0.so.0",
-    "ld-linux-x86-64.so.2",
-    "ld-2.17.so",
-}
-
-WINDOWS_ALLOWLIST = {
-    "MSVCP140.dll",
-    "KERNEL32.dll",
-    "VCRUNTIME140_1.dll",
-    "VCRUNTIME140.dll",
-    "api-ms-win-crt-heap-l1-1-0.dll",
-    "api-ms-win-crt-runtime-l1-1-0.dll",
-    "api-ms-win-crt-stdio-l1-1-0.dll",
-    "api-ms-win-crt-filesystem-l1-1-0.dll",
-    "api-ms-win-crt-string-l1-1-0.dll",
-    "api-ms-win-crt-environment-l1-1-0.dll",
-    "api-ms-win-crt-math-l1-1-0.dll",
-    "api-ms-win-crt-convert-l1-1-0.dll",
-}
-
-
-HERE = osp.dirname(osp.abspath(__file__))
-PACKAGE_ROOT = osp.dirname(osp.dirname(HERE))
-PLATFORM_ARCH = platform.machine()
-PYTHON_VERSION = sys.version_info
-
-
-def read_chunks(file, size=io.DEFAULT_BUFFER_SIZE):
-    """Yield pieces of data from a file-like object until EOF."""
-    while True:
-        chunk = file.read(size)
-        if not chunk:
-            break
-        yield chunk
-
-
-def rehash(path, blocksize=1 << 20):
-    """Return (hash, length) for path using hashlib.sha256()"""
-    h = hashlib.sha256()
-    length = 0
-    with open(path, "rb") as f:
-        for block in read_chunks(f, size=blocksize):
-            length += len(block)
-            h.update(block)
-    digest = "sha256=" + urlsafe_b64encode(h.digest()).decode("latin1").rstrip("=")
-    # unicode/str python2 issues
-    return (digest, str(length))  # type: ignore
-
-
-def unzip_file(file, dest):
-    """Decompress zip `file` into directory `dest`."""
-    with zipfile.ZipFile(file, "r") as zip_ref:
-        zip_ref.extractall(dest)
-
-
-def is_program_installed(basename):
-    """
-    Return program absolute path if installed in PATH.
-    Otherwise, return None
-    On macOS systems, a .app is considered installed if
-    it exists.
-    """
-    if sys.platform == "darwin" and basename.endswith(".app") and osp.exists(basename):
-        return basename
-
-    for path in os.environ["PATH"].split(os.pathsep):
-        abspath = osp.join(path, basename)
-        if osp.isfile(abspath):
-            return abspath
-
-
-def find_program(basename):
-    """
-    Find program in PATH and return absolute path
-    Try adding .exe or .bat to basename on Windows platforms
-    (return None if not found)
-    """
-    names = [basename]
-    if os.name == "nt":
-        # Windows platforms
-        extensions = (".exe", ".bat", ".cmd", ".dll")
-        if not basename.endswith(extensions):
-            names = [basename + ext for ext in extensions] + [basename]
-    for name in names:
-        path = is_program_installed(name)
-        if path:
-            return path
-
-
-def patch_new_path(library_path, new_dir):
-    library = osp.basename(library_path)
-    name, *rest = library.split(".")
-    rest = ".".join(rest)
-    hash_id = hashlib.sha256(library_path.encode("utf-8")).hexdigest()[:8]
-    new_name = ".".join([name, hash_id, rest])
-    return osp.join(new_dir, new_name)
-
-
-def find_dll_dependencies(dumpbin, binary):
-    out = subprocess.run([dumpbin, "/dependents", binary], stdout=subprocess.PIPE)
-    out = out.stdout.strip().decode("utf-8")
-    start_index = out.find("dependencies:") + len("dependencies:")
-    end_index = out.find("Summary")
-    dlls = out[start_index:end_index].strip()
-    dlls = dlls.split(os.linesep)
-    dlls = [dll.strip() for dll in dlls]
-    return dlls
-
-
-def relocate_elf_library(patchelf, output_dir, output_library, binary):
-    """
-    Relocate an ELF shared library to be packaged on a wheel.
-
-    Given a shared library, find the transitive closure of its dependencies,
-    rename and copy them into the wheel while updating their respective rpaths.
-    """
-
-    print("Relocating {0}".format(binary))
-    binary_path = osp.join(output_library, binary)
-
-    ld_tree = lddtree(binary_path)
-    tree_libs = ld_tree["libs"]
-
-    binary_queue = [(n, binary) for n in ld_tree["needed"]]
-    binary_paths = {binary: binary_path}
-    binary_dependencies = {}
-
-    while binary_queue != []:
-        library, parent = binary_queue.pop(0)
-        library_info = tree_libs[library]
-        print(library)
-
-        if library_info["path"] is None:
-            print("Omitting {0}".format(library))
-            continue
-
-        if library in ALLOWLIST:
-            # Omit glibc/gcc/system libraries
-            print("Omitting {0}".format(library))
-            continue
-
-        parent_dependencies = binary_dependencies.get(parent, [])
-        parent_dependencies.append(library)
-        binary_dependencies[parent] = parent_dependencies
-
-        if library in binary_paths:
-            continue
-
-        binary_paths[library] = library_info["path"]
-        binary_queue += [(n, library) for n in library_info["needed"]]
-
-    print("Copying dependencies to wheel directory")
-    new_libraries_path = osp.join(output_dir, "torchcsprng.libs")
-    os.makedirs(new_libraries_path)
-
-    new_names = {binary: binary_path}
-
-    for library in binary_paths:
-        if library != binary:
-            library_path = binary_paths[library]
-            new_library_path = patch_new_path(library_path, new_libraries_path)
-            print("{0} -> {1}".format(library, new_library_path))
-            shutil.copyfile(library_path, new_library_path)
-            new_names[library] = new_library_path
-
-    print("Updating dependency names by new files")
-    for library in binary_paths:
-        if library != binary:
-            if library not in binary_dependencies:
-                continue
-            library_dependencies = binary_dependencies[library]
-            new_library_name = new_names[library]
-            for dep in library_dependencies:
-                new_dep = osp.basename(new_names[dep])
-                print("{0}: {1} -> {2}".format(library, dep, new_dep))
-                subprocess.check_output(
-                    [patchelf, "--replace-needed", dep, new_dep, new_library_name],
-                    cwd=new_libraries_path,
-                )
-
-            print("Updating library rpath")
-            subprocess.check_output(
-                [patchelf, "--set-rpath", "$ORIGIN", new_library_name],
-                cwd=new_libraries_path,
-            )
-
-            subprocess.check_output(
-                [patchelf, "--print-rpath", new_library_name], cwd=new_libraries_path
-            )
-
-    print("Update library dependencies")
-    library_dependencies = binary_dependencies[binary]
-    for dep in library_dependencies:
-        new_dep = osp.basename(new_names[dep])
-        print("{0}: {1} -> {2}".format(binary, dep, new_dep))
-        subprocess.check_output(
-            [patchelf, "--replace-needed", dep, new_dep, binary], cwd=output_library
-        )
-
-    print("Update library rpath")
-    subprocess.check_output(
-        [patchelf, "--set-rpath", "$ORIGIN:$ORIGIN/../torchcsprng.libs", binary_path],
-        cwd=output_library,
-    )
-
-
-def relocate_dll_library(dumpbin, output_dir, output_library, binary):
-    """
-    Relocate a DLL/PE shared library to be packaged on a wheel.
-
-    Given a shared library, find the transitive closure of its dependencies,
-    rename and copy them into the wheel.
-    """
-    print("Relocating {0}".format(binary))
-    binary_path = osp.join(output_library, binary)
-
-    library_dlls = find_dll_dependencies(dumpbin, binary_path)
-    binary_queue = [(dll, binary) for dll in library_dlls]
-    binary_paths = {binary: binary_path}
-    binary_dependencies = {}
-
-    while binary_queue != []:
-        library, parent = binary_queue.pop(0)
-        if library in WINDOWS_ALLOWLIST or library.startswith("api-ms-win"):
-            print("Omitting {0}".format(library))
-            continue
-
-        library_path = find_program(library)
-        if library_path is None:
-            print("{0} not found".format(library))
-            continue
-
-        if osp.basename(osp.dirname(library_path)) == "system32":
-            continue
-
-        print("{0}: {1}".format(library, library_path))
-        parent_dependencies = binary_dependencies.get(parent, [])
-        parent_dependencies.append(library)
-        binary_dependencies[parent] = parent_dependencies
-
-        if library in binary_paths:
-            continue
-
-        binary_paths[library] = library_path
-        downstream_dlls = find_dll_dependencies(dumpbin, library_path)
-        binary_queue += [(n, library) for n in downstream_dlls]
-
-    print("Copying dependencies to wheel directory")
-    package_dir = osp.join(output_dir, "torchcsprng")
-    for library in binary_paths:
-        if library != binary:
-            library_path = binary_paths[library]
-            new_library_path = osp.join(package_dir, library)
-            print("{0} -> {1}".format(library, new_library_path))
-            shutil.copyfile(library_path, new_library_path)
-
-
-def compress_wheel(output_dir, wheel, wheel_dir, wheel_name):
-    """Create RECORD file and compress wheel distribution."""
-    print("Update RECORD file in wheel")
-    dist_info = glob.glob(osp.join(output_dir, "*.dist-info"))[0]
-    record_file = osp.join(dist_info, "RECORD")
-
-    with open(record_file, "w") as f:
-        for root, _, files in os.walk(output_dir):
-            for this_file in files:
-                full_file = osp.join(root, this_file)
-                rel_file = osp.relpath(full_file, output_dir)
-                if full_file == record_file:
-                    f.write("{0},,\n".format(rel_file))
-                else:
-                    digest, size = rehash(full_file)
-                    f.write("{0},{1},{2}\n".format(rel_file, digest, size))
-
-    print("Compressing wheel")
-    base_wheel_name = osp.join(wheel_dir, wheel_name)
-    shutil.make_archive(base_wheel_name, "zip", output_dir)
-    os.remove(wheel)
-    shutil.move("{0}.zip".format(base_wheel_name), wheel)
-    shutil.rmtree(output_dir)
-
-
-def patch_linux():
-    # Get patchelf location
-    patchelf = find_program("patchelf")
-    if patchelf is None:
-        raise FileNotFoundError(
-            "Patchelf was not found in the system, please"
-            " make sure that is available on the PATH."
-        )
-
-    # Find wheel
-    print("Finding wheels...")
-    wheels = glob.glob(osp.join(PACKAGE_ROOT, "dist", "*.whl"))
-    output_dir = osp.join(PACKAGE_ROOT, "dist", ".wheel-process")
-
-    image_binary = "image.so"
-    video_binary = "video_reader.so"
-    torchcsprng_binaries = [image_binary, video_binary]
-    for wheel in wheels:
-        if osp.exists(output_dir):
-            shutil.rmtree(output_dir)
-
-        os.makedirs(output_dir)
-
-        print("Unzipping wheel...")
-        wheel_file = osp.basename(wheel)
-        wheel_dir = osp.dirname(wheel)
-        print("{0}".format(wheel_file))
-        wheel_name, _ = osp.splitext(wheel_file)
-        unzip_file(wheel, output_dir)
-
-        print("Finding ELF dependencies...")
-        output_library = osp.join(output_dir, "torchcsprng")
-        for binary in torchcsprng_binaries:
-            if osp.exists(osp.join(output_library, binary)):
-                relocate_elf_library(patchelf, output_dir, output_library, binary)
-
-        compress_wheel(output_dir, wheel, wheel_dir, wheel_name)
-
-
-def patch_win():
-    # Get dumpbin location
-    dumpbin = find_program("dumpbin")
-    if dumpbin is None:
-        raise FileNotFoundError(
-            "Dumpbin was not found in the system, please"
-            " make sure that is available on the PATH."
-        )
-
-    # Find wheel
-    print("Finding wheels...")
-    wheels = glob.glob(osp.join(PACKAGE_ROOT, "dist", "*.whl"))
-    output_dir = osp.join(PACKAGE_ROOT, "dist", ".wheel-process")
-
-    image_binary = "image.pyd"
-    video_binary = "video_reader.pyd"
-    torchcsprng_binaries = [image_binary, video_binary]
-    for wheel in wheels:
-        if osp.exists(output_dir):
-            shutil.rmtree(output_dir)
-
-        os.makedirs(output_dir)
-
-        print("Unzipping wheel...")
-        wheel_file = osp.basename(wheel)
-        wheel_dir = osp.dirname(wheel)
-        print("{0}".format(wheel_file))
-        wheel_name, _ = osp.splitext(wheel_file)
-        unzip_file(wheel, output_dir)
-
-        print("Finding DLL/PE dependencies...")
-        output_library = osp.join(output_dir, "torchcsprng")
-        for binary in torchcsprng_binaries:
-            if osp.exists(osp.join(output_library, binary)):
-                relocate_dll_library(dumpbin, output_dir, output_library, binary)
-
-        compress_wheel(output_dir, wheel, wheel_dir, wheel_name)
-
-
-if __name__ == "__main__":
-    if sys.platform == "linux":
-        patch_linux()
-    elif sys.platform == "win32":
-        patch_win()
diff --git a/packaging/windows/azure-pipelines-ci.yml b/packaging/windows/azure-pipelines-ci.yml
deleted file mode 100644
index 6f9f346..0000000
--- a/packaging/windows/azure-pipelines-ci.yml
+++ /dev/null
@@ -1,11 +0,0 @@
-
-# Turn off auto builds for commits
-trigger: none
-pr: none
-
-jobs:
-- template: templates/build_task.yml
-  parameters:
-    package: 'Wheels'
-    spec: 'CPU'
-    msagent: true
diff --git a/packaging/windows/azure-pipelines.yml b/packaging/windows/azure-pipelines.yml
deleted file mode 100644
index d024057..0000000
--- a/packaging/windows/azure-pipelines.yml
+++ /dev/null
@@ -1,35 +0,0 @@
-
-# Turn off auto builds for commits
-trigger: none
-pr: none
-
-jobs:
-- template: templates/auth_task.yml
-
-- template: templates/build_task.yml
-  parameters:
-    package: 'Wheels'
-    spec: 'CPU'
-    msagent: true
-
-- template: templates/build_task.yml
-  parameters:
-    package: 'Conda'
-    spec: 'CPU'
-    msagent: true
-
-- template: templates/build_task.yml
-  parameters:
-    package: 'Wheels'
-    spec: 'CUDA'
-    msagent: true
-
-- template: templates/build_task.yml
-  parameters:
-    package: 'Conda'
-    spec: 'CUDA'
-    msagent: true
-
-- template: templates/linux_build_task.yml
-  parameters:
-    msagent: $(ms.hosted.agent.cpu)
diff --git a/packaging/windows/build_csprng.bat b/packaging/windows/build_csprng.bat
deleted file mode 100644
index e6da23d..0000000
--- a/packaging/windows/build_csprng.bat
+++ /dev/null
@@ -1,145 +0,0 @@
-@echo off
-
-:: This script parses args, installs required libraries (miniconda, MKL,
-:: Magma), and then delegates to cpu.bat, cuda80.bat, etc.
-
-IF NOT "%CUDA_VERSION%" == "" IF NOT "%TORCHCSPRNG_BUILD_VERSION%" == "" if NOT "%TORCHCSPRNG_BUILD_NUMBER%" == "" goto env_end
-if "%~1"=="" goto arg_error
-if "%~2"=="" goto arg_error
-if "%~3"=="" goto arg_error
-if NOT "%~4"=="" goto arg_error
-goto arg_end
-
-:arg_error
-
-echo Illegal number of parameters. Pass cuda version, pytorch version, build number
-echo CUDA version should be Mm with no dot, e.g. '80'
-echo DESIRED_PYTHON should be M.m, e.g. '2.7'
-exit /b 1
-
-:arg_end
-
-set CUDA_VERSION=%~1
-set TORCHCSPRNG_BUILD_VERSION=%~2
-set TORCHCSPRNG_BUILD_NUMBER=%~3
-
-set BUILD_VERSION=%TORCHCSPRNG_BUILD_VERSION%
-
-:env_end
-
-if NOT "%CUDA_VERSION%" == "cpu" (
-    set CUDA_PREFIX=cuda%CUDA_VERSION%
-    set CUVER=cu%CUDA_VERSION%
-    set FORCE_CUDA=1
-) else (
-    set CUDA_PREFIX=cpu
-    set CUVER=cpu
-)
-
-set BUILD_CSPRNG=1
-REM set TORCH_WHEEL=torch -f https://download.pytorch.org/whl/%CUVER%/stable.html --no-index
-
-IF "%DESIRED_PYTHON%" == "" set DESIRED_PYTHON=3.5;3.6;3.7
-set DESIRED_PYTHON_PREFIX=%DESIRED_PYTHON:.=%
-set DESIRED_PYTHON_PREFIX=py%DESIRED_PYTHON_PREFIX:;=;py%
-
-set SRC_DIR=%~dp0
-pushd %SRC_DIR%
-
-:: Install Miniconda3
-set "CONDA_HOME=%CD%\conda"
-set "tmp_conda=%CONDA_HOME%"
-set "miniconda_exe=%CD%\miniconda.exe"
-rmdir /s /q conda
-del miniconda.exe
-curl -k https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe -o "%miniconda_exe%"
-call ..\conda\install_conda.bat
-IF ERRORLEVEL 1 exit /b 1
-set "ORIG_PATH=%PATH%"
-set "PATH=%CONDA_HOME%;%CONDA_HOME%\scripts;%CONDA_HOME%\Library\bin;%PATH%"
-
-:: Create a new conda environment
-setlocal EnableDelayedExpansion
-FOR %%v IN (%DESIRED_PYTHON%) DO (
-    set PYTHON_VERSION_STR=%%v
-    set PYTHON_VERSION_STR=!PYTHON_VERSION_STR:.=!
-    conda remove -n py!PYTHON_VERSION_STR! --all -y || rmdir %CONDA_HOME%\envs\py!PYTHON_VERSION_STR! /s
-    conda create -n py!PYTHON_VERSION_STR! -y -q -c defaults -c conda-forge numpy>=1.11 mkl>=2018 python=%%v ca-certificates scipy pycrypto
-)
-
-:: Uncomment for stable releases
-:: FOR %%v IN (%DESIRED_PYTHON%) DO (
-::     set PYTHON_VERSION_STR=%%v
-::     set PYTHON_VERSION_STR=!PYTHON_VERSION_STR:.=!
-::     set "PATH=%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!;%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!\scripts;%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!\Library\bin;%ORIG_PATH%"
-
-::     if "%CUDA_VERSION%" == "100" (
-::         set TORCH_WHEEL=https://download.pytorch.org/whl/%CUVER%/torch-1.2.0-cp!PYTHON_VERSION_STR!-cp!PYTHON_VERSION_STR!m-win_amd64.whl
-::     ) else (
-::         set TORCH_WHEEL=https://download.pytorch.org/whl/%CUVER%/torch-1.2.0%%2B%CUVER%-cp!PYTHON_VERSION_STR!-cp!PYTHON_VERSION_STR!m-win_amd64.whl
-::     )
-::     echo Installing !TORCH_WHEEL!...
-::     pip install "!TORCH_WHEEL!"
-:: )
-
-:: Uncomment for nightly releases
-FOR %%v IN (%DESIRED_PYTHON%) DO (
-    set PYTHON_VERSION_STR=%%v
-    set PYTHON_VERSION_STR=!PYTHON_VERSION_STR:.=!
-    set "PATH=%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!;%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!\scripts;%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!\Library\bin;%ORIG_PATH%"
-
-    set TORCH_WHEEL=torch --pre -f https://download.pytorch.org/whl/nightly/%CUVER%/torch_nightly.html
-    echo Installing !TORCH_WHEEL!...
-    pip install !TORCH_WHEEL!
-)
-
-endlocal
-
-if "%DEBUG%" == "1" (
-    set BUILD_TYPE=debug
-) ELSE (
-    set BUILD_TYPE=release
-)
-
-:: Install sccache
-if "%USE_SCCACHE%" == "1" (
-    mkdir %CD%\tmp_bin
-    curl -k https://s3.amazonaws.com/ossci-windows/sccache.exe --output %CD%\tmp_bin\sccache.exe
-    if not "%CUDA_VERSION%" == "" (
-        copy %CD%\tmp_bin\sccache.exe %CD%\tmp_bin\nvcc.exe
-
-        set CUDA_NVCC_EXECUTABLE=%CD%\tmp_bin\nvcc
-        set "PATH=%CD%\tmp_bin;%PATH%"
-    )
-)
-
-for %%v in (%DESIRED_PYTHON_PREFIX%) do (
-    :: Activate Python Environment
-    set PYTHON_PREFIX=%%v
-    set "PATH=%CONDA_HOME%\envs\%%v;%CONDA_HOME%\envs\%%v\scripts;%CONDA_HOME%\envs\%%v\Library\bin;%ORIG_PATH%"
-    if defined INCLUDE (
-        set "INCLUDE=%INCLUDE%;%CONDA_HOME%\envs\%%v\Library\include"
-    ) else (
-        set "INCLUDE=%CONDA_HOME%\envs\%%v\Library\include"
-    )
-    if defined LIB (
-        set "LIB=%LIB%;%CONDA_HOME%\envs\%%v\Library\lib"
-    ) else (
-        set "LIB=%CONDA_HOME%\envs\%%v\Library\lib"
-    )
-    @setlocal
-    :: Set Flags
-    if NOT "%CUDA_VERSION%"=="cpu" (
-        set CUDNN_VERSION=7
-    )
-    call %CUDA_PREFIX%.bat
-    IF ERRORLEVEL 1 exit /b 1
-    call internal\test.bat
-    IF ERRORLEVEL 1 exit /b 1
-    @endlocal
-)
-
-set "PATH=%ORIG_PATH%"
-popd
-
-IF ERRORLEVEL 1 exit /b 1
diff --git a/packaging/windows/cpu.bat b/packaging/windows/cpu.bat
deleted file mode 100644
index 1897fb5..0000000
--- a/packaging/windows/cpu.bat
+++ /dev/null
@@ -1,37 +0,0 @@
-@echo off
-
-IF NOT "%BUILD_CSPRNG%" == "" (
-    set MODULE_NAME=csprng
-) ELSE (
-    set MODULE_NAME=pytorch
-)
-
-IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" (
-    call internal\clone.bat
-    cd ..
-    IF ERRORLEVEL 1 goto eof
-) ELSE (
-    call internal\clean.bat
-)
-
-call internal\check_deps.bat
-IF ERRORLEVEL 1 goto eof
-
-REM Check for optional components
-
-echo Disabling CUDA
-set NO_CUDA=1
-set USE_CUDA=0
-
-IF "%BUILD_CSPRNG%" == "" (
-    call internal\check_opts.bat
-    IF ERRORLEVEL 1 goto eof
-
-    call internal\copy_cpu.bat
-    IF ERRORLEVEL 1 goto eof
-)
-
-call internal\setup.bat
-IF ERRORLEVEL 1 goto eof
-
-:eof
diff --git a/packaging/windows/cuda101.bat b/packaging/windows/cuda101.bat
deleted file mode 100644
index 016baec..0000000
--- a/packaging/windows/cuda101.bat
+++ /dev/null
@@ -1,59 +0,0 @@
-@echo off
-
-IF NOT "%BUILD_CSPRNG%" == "" (
-    set MODULE_NAME=csprng
-) ELSE (
-    set MODULE_NAME=pytorch
-)
-
-IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" (
-    call internal\clone.bat
-    cd ..
-    IF ERRORLEVEL 1 goto eof
-) ELSE (
-    call internal\clean.bat
-)
-
-call internal\check_deps.bat
-IF ERRORLEVEL 1 goto eof
-
-REM Check for optional components
-
-set NO_CUDA=
-set CMAKE_GENERATOR=Visual Studio 15 2017 Win64
-
-IF "%NVTOOLSEXT_PATH%"=="" (
-    echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing
-    exit /b 1
-    goto optcheck
-)
-
-IF "%CUDA_PATH_V10_1%"=="" (
-    echo CUDA 10.1 not found, failing
-    exit /b 1
-) ELSE (
-    IF "%BUILD_CSPRNG%" == "" (
-        set TORCH_CUDA_ARCH_LIST=3.5;5.0+PTX;6.0;6.1;7.0;7.5
-        set TORCH_NVCC_FLAGS=-Xfatbin -compress-all
-    ) ELSE (
-        set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50
-    )
-
-    set "CUDA_PATH=%CUDA_PATH_V10_1%"
-    set "PATH=%CUDA_PATH_V10_1%\bin;%PATH%"
-)
-
-:optcheck
-
-IF "%BUILD_CSPRNG%" == "" (
-    call internal\check_opts.bat
-    IF ERRORLEVEL 1 goto eof
-
-    call internal\copy.bat
-    IF ERRORLEVEL 1 goto eof
-)
-
-call internal\setup.bat
-IF ERRORLEVEL 1 goto eof
-
-:eof
diff --git a/packaging/windows/cuda102.bat b/packaging/windows/cuda102.bat
deleted file mode 100644
index d5a0bdf..0000000
--- a/packaging/windows/cuda102.bat
+++ /dev/null
@@ -1,59 +0,0 @@
-@echo off
-
-IF NOT "%BUILD_CSPRNG%" == "" (
-    set MODULE_NAME=csprng
-) ELSE (
-    set MODULE_NAME=pytorch
-)
-
-IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" (
-    call internal\clone.bat
-    cd ..
-    IF ERRORLEVEL 1 goto eof
-) ELSE (
-    call internal\clean.bat
-)
-
-call internal\check_deps.bat
-IF ERRORLEVEL 1 goto eof
-
-REM Check for optional components
-
-set NO_CUDA=
-set CMAKE_GENERATOR=Visual Studio 15 2017 Win64
-
-IF "%NVTOOLSEXT_PATH%"=="" (
-    echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing
-    exit /b 1
-    goto optcheck
-)
-
-IF "%CUDA_PATH_V10_2%"=="" (
-    echo CUDA 10.2 not found, failing
-    exit /b 1
-) ELSE (
-    IF "%BUILD_CSPRNG%" == "" (
-        set TORCH_CUDA_ARCH_LIST=3.5;5.0+PTX;6.0;6.1;7.0;7.5
-        set TORCH_NVCC_FLAGS=-Xfatbin -compress-all
-    ) ELSE (
-        set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50
-    )
-
-    set "CUDA_PATH=%CUDA_PATH_V10_2%"
-    set "PATH=%CUDA_PATH_V10_2%\bin;%PATH%"
-)
-
-:optcheck
-
-IF "%BUILD_CSPRNG%" == "" (
-    call internal\check_opts.bat
-    IF ERRORLEVEL 1 goto eof
-
-    call internal\copy.bat
-    IF ERRORLEVEL 1 goto eof
-)
-
-call internal\setup.bat
-IF ERRORLEVEL 1 goto eof
-
-:eof
diff --git a/packaging/windows/cuda92.bat b/packaging/windows/cuda92.bat
deleted file mode 100644
index 7f520da..0000000
--- a/packaging/windows/cuda92.bat
+++ /dev/null
@@ -1,59 +0,0 @@
-@echo off
-
-IF NOT "%BUILD_CSPRNG%" == "" (
-    set MODULE_NAME=csprng
-) ELSE (
-    set MODULE_NAME=pytorch
-)
-
-IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" (
-    call internal\clone.bat
-    cd ..
-    IF ERRORLEVEL 1 goto eof
-) ELSE (
-    call internal\clean.bat
-)
-
-call internal\check_deps.bat
-IF ERRORLEVEL 1 goto eof
-
-REM Check for optional components
-
-set USE_CUDA=
-set CMAKE_GENERATOR=Visual Studio 15 2017 Win64
-
-IF "%NVTOOLSEXT_PATH%"=="" (
-    echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing
-    exit /b 1
-    goto optcheck
-)
-
-IF "%CUDA_PATH_V9_2%"=="" (
-    echo CUDA 9.2 not found, failing
-    exit /b 1
-) ELSE (
-    IF "%BUILD_CSPRNG%" == "" (
-        set TORCH_CUDA_ARCH_LIST=3.5;5.0+PTX;6.0;6.1;7.0
-        set TORCH_NVCC_FLAGS=-Xfatbin -compress-all
-    ) ELSE (
-        set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_61,code=sm_61 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50
-    )
-
-    set "CUDA_PATH=%CUDA_PATH_V9_2%"
-    set "PATH=%CUDA_PATH_V9_2%\bin;%PATH%"
-)
-
-:optcheck
-
-IF "%BUILD_CSPRNG%" == "" (
-    call internal\check_opts.bat
-    IF ERRORLEVEL 1 goto eof
-
-    call internal\copy.bat
-    IF ERRORLEVEL 1 goto eof
-)
-
-call internal\setup.bat
-IF ERRORLEVEL 1 goto eof
-
-:eof
diff --git a/packaging/windows/internal/auth.bat b/packaging/windows/internal/auth.bat
deleted file mode 100644
index c874bce..0000000
--- a/packaging/windows/internal/auth.bat
+++ /dev/null
@@ -1,46 +0,0 @@
-@echo off
-
-: From the following doc, the build won't be triggered if the users don't sign in daily.
-: https://docs.microsoft.com/en-us/azure/devops/pipelines/build/triggers?tabs=yaml&view=vsts#my-build-didnt-run-what-happened
-: To avoid this problem, we can just go through the sign in process using the following command.
-
-:auth_start
-
-if "%RETRY_TIMES%" == "" (
-    set /a RETRY_TIMES=10
-    set /a SLEEP_TIME=2
-) else (
-    set /a RETRY_TIMES=%RETRY_TIMES%-1
-    set /a SLEEP_TIME=%SLEEP_TIME%*2
-)
-
-for /f "usebackq tokens=*" %%i in (`curl -so NUL -w "%%{http_code}" -u %VSTS_AUTH% https://dev.azure.com/pytorch`) do (
-    set STATUS_CODE=%%i
-)
-
-IF NOT "%STATUS_CODE%" == "200" (
-    echo Auth retry times remaining: %RETRY_TIMES%
-    echo Sleep time: %SLEEP_TIME% seconds
-    IF %RETRY_TIMES% EQU 0 (
-        echo Auth failed
-        goto err
-    )
-    waitfor SomethingThatIsNeverHappening /t %SLEEP_TIME% 2>nul || ver >nul
-    goto auth_start
-) ELSE (
-    echo Login Attempt Succeeded
-    goto auth_end
-)
-
-:err
-
-: Throw a warning if it fails
-powershell -c "Write-Warning 'Login Attempt Failed'"
-
-:auth_end
-
-set RETRY_TIMES=
-set SLEEP_TIME=
-set STATUS_CODE=
-
-exit /b 0
diff --git a/packaging/windows/internal/build_conda.bat b/packaging/windows/internal/build_conda.bat
deleted file mode 100644
index 6ffd67b..0000000
--- a/packaging/windows/internal/build_conda.bat
+++ /dev/null
@@ -1,15 +0,0 @@
-if "%VC_YEAR%" == "2017" set VSDEVCMD_ARGS=-vcvars_ver=14.13
-if "%VC_YEAR%" == "2017" powershell packaging/windows/internal/vs2017_install.ps1
-if errorlevel 1 exit /b 1
-
-call packaging/windows/internal/cuda_install.bat
-if errorlevel 1 exit /b 1
-
-call packaging/windows/internal/nightly_defaults.bat Conda
-if errorlevel 1 exit /b 1
-
-set PYTORCH_FINAL_PACKAGE_DIR=%CD%\packaging\windows\output
-if not exist "%PYTORCH_FINAL_PACKAGE_DIR%" mkdir %PYTORCH_FINAL_PACKAGE_DIR%
-
-bash ./packaging/conda/build_csprng.sh %CUDA_VERSION% %TORCHCSPRNG_BUILD_VERSION% %TORCHCSPRNG_BUILD_NUMBER%
-if errorlevel 1 exit /b 1
diff --git a/packaging/windows/internal/build_wheels.bat b/packaging/windows/internal/build_wheels.bat
deleted file mode 100644
index 876b8b0..0000000
--- a/packaging/windows/internal/build_wheels.bat
+++ /dev/null
@@ -1,12 +0,0 @@
-if "%VC_YEAR%" == "2017" set VSDEVCMD_ARGS=-vcvars_ver=14.13
-if "%VC_YEAR%" == "2017" powershell packaging/windows/internal/vs2017_install.ps1
-if errorlevel 1 exit /b 1
-
-call packaging/windows/internal/cuda_install.bat
-if errorlevel 1 exit /b 1
-
-call packaging/windows/internal/nightly_defaults.bat Wheels
-if errorlevel 1 exit /b 1
-
-call packaging/windows/build_csprng.bat %CUDA_VERSION% %TORCHCSPRNG_BUILD_VERSION% %TORCHCSPRNG_BUILD_NUMBER%
-if errorlevel 1 exit /b 1
diff --git a/packaging/windows/internal/check_deps.bat b/packaging/windows/internal/check_deps.bat
deleted file mode 100644
index 739e568..0000000
--- a/packaging/windows/internal/check_deps.bat
+++ /dev/null
@@ -1,67 +0,0 @@
-@echo off
-
-REM Check for necessary components
-
-IF NOT "%PROCESSOR_ARCHITECTURE%"=="AMD64" (
-    echo You should use 64 bits Windows to build and run PyTorch
-    exit /b 1
-)
-
-IF "%BUILD_CSPRNG%" == "" (
-    where /q cmake.exe
-
-    IF ERRORLEVEL 1 (
-        echo CMake is required to compile PyTorch on Windows
-        exit /b 1
-    )
-)
-
-IF NOT EXIST "%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" (
-    echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch on Windows
-    exit /b 1
-)
-
-for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do (
-    if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
-        set "VS15INSTALLDIR=%%i"
-        set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat"
-        goto vswhere
-    )
-)
-
-:vswhere
-IF "%VS15VCVARSALL%"=="" (
-    echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch on Windows
-    exit /b 1
-)
-
-set MSSdk=1
-set DISTUTILS_USE_SDK=1
-
-where /q python.exe
-
-IF ERRORLEVEL 1 (
-    echo Python x64 3.5 or up is required to compile PyTorch on Windows
-    exit /b 1
-)
-
-for /F "usebackq delims=" %%i in (`python -c "import sys; print('{0[0]}{0[1]}'.format(sys.version_info))"`) do (
-    set /a PYVER=%%i
-)
-
-if  %PYVER% LSS 35 (
-    echo Warning: PyTorch for Python 2 under Windows is experimental.
-    echo Python x64 3.5 or up is recommended to compile PyTorch on Windows
-    echo Maybe you can create a virual environment if you have conda installed:
-    echo ^> conda create -n test python=3.6 pyyaml mkl numpy
-    echo ^> activate test
-)
-
-for /F "usebackq delims=" %%i in (`python -c "import struct;print( 8 * struct.calcsize('P'))"`) do (
-    set /a PYSIZE=%%i
-)
-
-if %PYSIZE% NEQ 64 (
-    echo Python x64 3.5 or up is required to compile PyTorch on Windows
-    exit /b 1
-)
diff --git a/packaging/windows/internal/check_opts.bat b/packaging/windows/internal/check_opts.bat
deleted file mode 100644
index 003ad92..0000000
--- a/packaging/windows/internal/check_opts.bat
+++ /dev/null
@@ -1,33 +0,0 @@
-@echo off
-
-REM Check for optional components
-
-where /q ninja.exe
-
-IF NOT ERRORLEVEL 1 (
-    echo Ninja found, using it to speed up builds
-    set CMAKE_GENERATOR=Ninja
-)
-
-where /q clcache.exe
-
-IF NOT ERRORLEVEL 1 (
-    echo clcache found, using it to speed up builds
-    set CC=clcache
-    set CXX=clcache
-)
-
-where /q sccache.exe
-
-IF NOT ERRORLEVEL 1 (
-    echo sccache found, using it to speed up builds
-    set CC=sccache cl
-    set CXX=sccache cl
-)
-
-IF exist "%MKLProductDir%\mkl\lib\intel64_win" (
-    echo MKL found, adding it to build
-    set "LIB=%MKLProductDir%\mkl\lib\intel64_win;%MKLProductDir%\compiler\lib\intel64_win;%LIB%";
-)
-
-exit /b 0
diff --git a/packaging/windows/internal/clean.bat b/packaging/windows/internal/clean.bat
deleted file mode 100644
index 7489640..0000000
--- a/packaging/windows/internal/clean.bat
+++ /dev/null
@@ -1,5 +0,0 @@
-@echo off
-
-cd %MODULE_NAME%
-python setup.py clean
-cd ..
diff --git a/packaging/windows/internal/clone.bat b/packaging/windows/internal/clone.bat
deleted file mode 100644
index 758527c..0000000
--- a/packaging/windows/internal/clone.bat
+++ /dev/null
@@ -1,56 +0,0 @@
-@echo off
-
-:: The conda and wheels jobs are seperated on Windows, so we don't need to clone again.
-IF "%BUILD_CSPRNG%" == "" (
-    if exist "%NIGHTLIES_PYTORCH_ROOT%" (
-        xcopy /E /Y /Q "%NIGHTLIES_PYTORCH_ROOT%" pytorch\
-        cd pytorch
-        goto submodule
-    )
-)
-
-git clone https://github.com/%PYTORCH_REPO%/%MODULE_NAME%
-
-cd %MODULE_NAME%
-
-IF NOT "%BUILD_CSPRNG%" == "" goto latest_end
-
-IF "%PYTORCH_BRANCH%" == "latest" ( goto latest_start ) else ( goto latest_end )
-
-:latest_start
-
-if "%NIGHTLIES_DATE%" == "" ( goto date_start ) else ( goto date_end )
-
-:date_start
-
-set "DATE_CMD=Get-Date ([System.TimeZoneInfo]::ConvertTimeFromUtc((Get-Date).ToUniversalTime(), [System.TimeZoneInfo]::FindSystemTimeZoneById('Pacific Standard Time'))) -f 'yyyy_MM_dd'"
-set "DATE_COMPACT_CMD=Get-Date ([System.TimeZoneInfo]::ConvertTimeFromUtc((Get-Date).ToUniversalTime(), [System.TimeZoneInfo]::FindSystemTimeZoneById('Pacific Standard Time'))) -f 'yyyyMMdd'"
-
-FOR /F "delims=" %%i IN ('powershell -c "%DATE_CMD%"') DO set NIGHTLIES_DATE=%%i
-FOR /F "delims=" %%i IN ('powershell -c "%DATE_COMPACT_CMD%"') DO set NIGHTLIES_DATE_COMPACT=%%i
-
-:date_end
-
-if "%NIGHTLIES_DATE_COMPACT%" == "" set NIGHTLIES_DATE_COMPACT=%NIGHTLIES_DATE:~0,4%%NIGHTLIES_DATE:~5,2%%NIGHTLIES_DATE:~8,2%
-
-:: Switch to the latest commit by 11:59 yesterday
-echo PYTORCH_BRANCH is set to latest so I will find the last commit
-echo before 0:00 midnight on %NIGHTLIES_DATE%
-set git_date=%NIGHTLIES_DATE:_=-%
-FOR /F "delims=" %%i IN ('git log --before %git_date% -n 1 "--pretty=%%H"') DO set last_commit=%%i
-echo Setting PYTORCH_BRANCH to %last_commit% since that was the last
-echo commit before %NIGHTLIES_DATE%
-set PYTORCH_BRANCH=%last_commit%
-
-:latest_end
-
-IF "%PYTORCH_BRANCH%" == "" (
-    set PYTORCH_BRANCH=v%TORCHCSPRNG_BUILD_VERSION%
-)
-git checkout %PYTORCH_BRANCH%
-IF ERRORLEVEL 1 git checkout tags/%PYTORCH_BRANCH%
-
-:submodule
-
-git submodule update --init --recursive
-IF ERRORLEVEL 1 exit /b 1
diff --git a/packaging/windows/internal/copy.bat b/packaging/windows/internal/copy.bat
deleted file mode 100644
index b4aa397..0000000
--- a/packaging/windows/internal/copy.bat
+++ /dev/null
@@ -1,13 +0,0 @@
-copy "%CUDA_PATH%\bin\cusparse64_%CUDA_VERSION%.dll*" pytorch\torch\lib
-copy "%CUDA_PATH%\bin\cublas64_%CUDA_VERSION%.dll*" pytorch\torch\lib
-copy "%CUDA_PATH%\bin\cudart64_%CUDA_VERSION%.dll*" pytorch\torch\lib
-copy "%CUDA_PATH%\bin\curand64_%CUDA_VERSION%.dll*" pytorch\torch\lib
-copy "%CUDA_PATH%\bin\cufft64_%CUDA_VERSION%.dll*" pytorch\torch\lib
-copy "%CUDA_PATH%\bin\cufftw64_%CUDA_VERSION%.dll*" pytorch\torch\lib
-
-copy "%CUDA_PATH%\bin\cudnn64_%CUDNN_VERSION%.dll*" pytorch\torch\lib
-copy "%CUDA_PATH%\bin\nvrtc64_%CUDA_VERSION%*.dll*" pytorch\torch\lib
-copy "%CUDA_PATH%\bin\nvrtc-builtins64_%CUDA_VERSION%.dll*" pytorch\torch\lib
-
-copy "C:\Program Files\NVIDIA Corporation\NvToolsExt\bin\x64\nvToolsExt64_1.dll*" pytorch\torch\lib
-copy "%CONDA_LIB_PATH%\libiomp*5md.dll" pytorch\torch\lib
diff --git a/packaging/windows/internal/copy_cpu.bat b/packaging/windows/internal/copy_cpu.bat
deleted file mode 100644
index f5b9d11..0000000
--- a/packaging/windows/internal/copy_cpu.bat
+++ /dev/null
@@ -1 +0,0 @@
-copy "%CONDA_LIB_PATH%\libiomp*5md.dll" pytorch\torch\lib
diff --git a/packaging/windows/internal/cuda_install.bat b/packaging/windows/internal/cuda_install.bat
deleted file mode 100644
index 9ca08e1..0000000
--- a/packaging/windows/internal/cuda_install.bat
+++ /dev/null
@@ -1,201 +0,0 @@
-@echo on
-
-if "%CU_VERSION%" == "cpu" (
-    echo Skipping for CPU builds
-    exit /b 0
-)
-
-set SRC_DIR=%~dp0\..
-
-if not exist "%SRC_DIR%\temp_build" mkdir "%SRC_DIR%\temp_build"
-
-set /a CUDA_VER=%CU_VERSION:cu=%
-set CUDA_VER_MAJOR=%CUDA_VER:~0,-1%
-set CUDA_VER_MINOR=%CUDA_VER:~-1,1%
-set CUDA_VERSION_STR=%CUDA_VER_MAJOR%.%CUDA_VER_MINOR%
-
-if %CUDA_VER% EQU 92 goto cuda92
-if %CUDA_VER% EQU 100 goto cuda100
-if %CUDA_VER% EQU 101 goto cuda101
-if %CUDA_VER% EQU 102 goto cuda102
-if %CUDA_VER% EQU 110 goto cuda110
-if %CUDA_VER% EQU 111 goto cuda111
-if %CUDA_VER% EQU 112 goto cuda112
-
-echo CUDA %CUDA_VERSION_STR% is not supported
-exit /b 1
-
-:cuda92
-if not exist "%SRC_DIR%\temp_build\cuda_9.2.148_win10.exe" (
-    curl -k -L https://ossci-windows.s3.amazonaws.com/win2016/cuda_9.2.148_win10.exe --output "%SRC_DIR%\temp_build\cuda_9.2.148_win10.exe"
-    if errorlevel 1 exit /b 1
-    set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_9.2.148_win10.exe"
-    set "ARGS=nvcc_9.2 cuobjdump_9.2 nvprune_9.2 cupti_9.2 cublas_9.2 cublas_dev_9.2 cudart_9.2 cufft_9.2 cufft_dev_9.2 curand_9.2 curand_dev_9.2 cusolver_9.2 cusolver_dev_9.2 cusparse_9.2 cusparse_dev_9.2 nvgraph_9.2 nvgraph_dev_9.2 npp_9.2 npp_dev_9.2 nvrtc_9.2 nvrtc_dev_9.2 nvml_dev_9.2"
-)
-
-if not exist "%SRC_DIR%\temp_build\cudnn-9.2-windows10-x64-v7.2.1.38.zip" (
-    curl -k -L https://ossci-windows.s3.amazonaws.com/win2016/cudnn-9.2-windows10-x64-v7.2.1.38.zip --output "%SRC_DIR%\temp_build\cudnn-9.2-windows10-x64-v7.2.1.38.zip"
-    if errorlevel 1 exit /b 1
-    set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-9.2-windows10-x64-v7.2.1.38.zip"
-)
-
-goto cuda_common
-
-:cuda100
-
-if not exist "%SRC_DIR%\temp_build\cuda_10.0.130_411.31_win10.exe" (
-    curl -k -L https://ossci-windows.s3.amazonaws.com/win2016/cuda_10.0.130_411.31_win10.exe --output "%SRC_DIR%\temp_build\cuda_10.0.130_411.31_win10.exe"
-    if errorlevel 1 exit /b 1
-    set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_10.0.130_411.31_win10.exe"
-    set "ARGS=nvcc_10.0 cuobjdump_10.0 nvprune_10.0 cupti_10.0 cublas_10.0 cublas_dev_10.0 cudart_10.0 cufft_10.0 cufft_dev_10.0 curand_10.0 curand_dev_10.0 cusolver_10.0 cusolver_dev_10.0 cusparse_10.0 cusparse_dev_10.0 nvgraph_10.0 nvgraph_dev_10.0 npp_10.0 npp_dev_10.0 nvrtc_10.0 nvrtc_dev_10.0 nvml_dev_10.0"
-)
-
-if not exist "%SRC_DIR%\temp_build\cudnn-10.0-windows10-x64-v7.4.1.5.zip" (
-    curl -k -L https://ossci-windows.s3.amazonaws.com/win2016/cudnn-10.0-windows10-x64-v7.4.1.5.zip --output "%SRC_DIR%\temp_build\cudnn-10.0-windows10-x64-v7.4.1.5.zip"
-    if errorlevel 1 exit /b 1
-    set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-10.0-windows10-x64-v7.4.1.5.zip"
-)
-
-goto cuda_common
-
-:cuda101
-
-if not exist "%SRC_DIR%\temp_build\cuda_10.1.243_426.00_win10.exe" (
-    curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_10.1.243_426.00_win10.exe --output "%SRC_DIR%\temp_build\cuda_10.1.243_426.00_win10.exe"
-    if errorlevel 1 exit /b 1
-    set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_10.1.243_426.00_win10.exe"
-    set "ARGS=nvcc_10.1 cuobjdump_10.1 nvprune_10.1 cupti_10.1 cublas_10.1 cublas_dev_10.1 cudart_10.1 cufft_10.1 cufft_dev_10.1 curand_10.1 curand_dev_10.1 cusolver_10.1 cusolver_dev_10.1 cusparse_10.1 cusparse_dev_10.1 nvgraph_10.1 nvgraph_dev_10.1 npp_10.1 npp_dev_10.1 nvrtc_10.1 nvrtc_dev_10.1 nvml_dev_10.1"
-)
-
-if not exist "%SRC_DIR%\temp_build\cudnn-10.1-windows10-x64-v7.6.4.38.zip" (
-    curl -k -L https://ossci-windows.s3.amazonaws.com/cudnn-10.1-windows10-x64-v7.6.4.38.zip --output "%SRC_DIR%\temp_build\cudnn-10.1-windows10-x64-v7.6.4.38.zip"
-    if errorlevel 1 exit /b 1
-    set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-10.1-windows10-x64-v7.6.4.38.zip"
-)
-
-goto cuda_common
-
-:cuda102
-
-if not exist "%SRC_DIR%\temp_build\cuda_10.2.89_441.22_win10.exe" (
-    curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_10.2.89_441.22_win10.exe --output "%SRC_DIR%\temp_build\cuda_10.2.89_441.22_win10.exe"
-    if errorlevel 1 exit /b 1
-    set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_10.2.89_441.22_win10.exe"
-    set "ARGS=nvcc_10.2 cuobjdump_10.2 nvprune_10.2 cupti_10.2 cublas_10.2 cublas_dev_10.2 cudart_10.2 cufft_10.2 cufft_dev_10.2 curand_10.2 curand_dev_10.2 cusolver_10.2 cusolver_dev_10.2 cusparse_10.2 cusparse_dev_10.2 nvgraph_10.2 nvgraph_dev_10.2 npp_10.2 npp_dev_10.2 nvrtc_10.2 nvrtc_dev_10.2 nvml_dev_10.2"
-)
-
-if not exist "%SRC_DIR%\temp_build\cudnn-10.2-windows10-x64-v7.6.5.32.zip" (
-    curl -k -L https://ossci-windows.s3.amazonaws.com/cudnn-10.2-windows10-x64-v7.6.5.32.zip --output "%SRC_DIR%\temp_build\cudnn-10.2-windows10-x64-v7.6.5.32.zip"
-    if errorlevel 1 exit /b 1
-    set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-10.2-windows10-x64-v7.6.5.32.zip"
-)
-
-goto cuda_common
-
-:cuda110
-
-if not exist "%SRC_DIR%\temp_build\cuda_11.0.2_451.48_win10.exe" (
-    curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_11.0.2_451.48_win10.exe --output "%SRC_DIR%\temp_build\cuda_11.0.2_451.48_win10.exe"
-    if errorlevel 1 exit /b 1
-    set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_11.0.2_451.48_win10.exe"
-    set "ARGS=nvcc_11.0 cuobjdump_11.0 nvprune_11.0 nvprof_11.0 cupti_11.0 cublas_11.0 cublas_dev_11.0 cudart_11.0 cufft_11.0 cufft_dev_11.0 curand_11.0 curand_dev_11.0 cusolver_11.0 cusolver_dev_11.0 cusparse_11.0 cusparse_dev_11.0 npp_11.0 npp_dev_11.0 nvrtc_11.0 nvrtc_dev_11.0 nvml_dev_11.0"
-)
-
-if not exist "%SRC_DIR%\temp_build\cudnn-11.0-windows-x64-v8.0.4.30.zip" (
-    curl -k -L https://ossci-windows.s3.amazonaws.com/cudnn-11.0-windows-x64-v8.0.4.30.zip --output "%SRC_DIR%\temp_build\cudnn-11.0-windows-x64-v8.0.4.30.zip"
-    if errorlevel 1 exit /b 1
-    set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-11.0-windows-x64-v8.0.4.30.zip"
-)
-
-goto cuda_common
-
-:cuda111
-
-if not exist "%SRC_DIR%\temp_build\cuda_11.1.0_456.43_win10.exe" (
-    curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_11.1.0_456.43_win10.exe --output "%SRC_DIR%\temp_build\cuda_11.1.0_456.43_win10.exe"
-    if errorlevel 1 exit /b 1
-    set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_11.1.0_456.43_win10.exe"
-    set "ARGS=nvcc_11.1 cuobjdump_11.1 nvprune_11.1 nvprof_11.1 cupti_11.1 cublas_11.1 cublas_dev_11.1 cudart_11.1 cufft_11.1 cufft_dev_11.1 curand_11.1 curand_dev_11.1 cusolver_11.1 cusolver_dev_11.1 cusparse_11.1 cusparse_dev_11.1 npp_11.1 npp_dev_11.1 nvrtc_11.1 nvrtc_dev_11.1 nvml_dev_11.1"
-)
-
-@REM There is no downloadable driver for Tesla on CUDA 11.1 yet. We will use
-@REM the driver inside CUDA
-if "%JOB_EXECUTOR%" == "windows-with-nvidia-gpu" set "ARGS=%ARGS% Display.Driver"
-
-if not exist "%SRC_DIR%\temp_build\cudnn-11.1-windows-x64-v8.0.5.39.zip" (
-    curl -k -L https://ossci-windows.s3.amazonaws.com/cudnn-11.1-windows-x64-v8.0.5.39.zip --output "%SRC_DIR%\temp_build\cudnn-11.1-windows-x64-v8.0.5.39.zip"
-    if errorlevel 1 exit /b 1
-    set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-11.1-windows-x64-v8.0.5.39.zip"
-)
-
-goto cuda_common
-
-:cuda112
-
-if not exist "%SRC_DIR%\temp_build\cuda_11.2.0_460.89_win10.exe" (
-    curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_11.2.0_460.89_win10.exe --output "%SRC_DIR%\temp_build\cuda_11.2.0_460.89_win10.exe"
-    if errorlevel 1 exit /b 1
-    set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_11.2.0_460.89_win10.exe"
-    set "ARGS=nvcc_11.2 cuobjdump_11.2 nvprune_11.2 nvprof_11.2 cupti_11.2 cublas_11.2 cublas_dev_11.2 cudart_11.2 cufft_11.2 cufft_dev_11.2 curand_11.2 curand_dev_11.2 cusolver_11.2 cusolver_dev_11.2 cusparse_11.2 cusparse_dev_11.2 npp_11.2 npp_dev_11.2 nvrtc_11.2 nvrtc_dev_11.2 nvml_dev_11.2"
-)
-
-if not exist "%SRC_DIR%\temp_build\cudnn-11.2-windows-x64-v8.1.0.77.zip" (
-    curl -k -L http://s3.amazonaws.com/ossci-windows/cudnn-11.2-windows-x64-v8.1.0.77.zip --output "%SRC_DIR%\temp_build\cudnn-11.2-windows-x64-v8.1.0.77.zip"
-    if errorlevel 1 exit /b 1
-    set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-11.2-windows-x64-v8.1.0.77.zip"
-)
-
-goto cuda_common
-
-:cuda_common
-
-if not exist "%SRC_DIR%\temp_build\NvToolsExt.7z" (
-    curl -k -L https://www.dropbox.com/s/9mcolalfdj4n979/NvToolsExt.7z?dl=1 --output "%SRC_DIR%\temp_build\NvToolsExt.7z"
-    if errorlevel 1 exit /b 1
-)
-
-if not exist "%SRC_DIR%\temp_build\gpu_driver_dlls.7z" (
-    curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "%SRC_DIR%\temp_build\gpu_driver_dlls.zip"
-    if errorlevel 1 exit /b 1
-)
-
-echo Installing CUDA toolkit...
-7z x %CUDA_SETUP_FILE% -o"%SRC_DIR%\temp_build\cuda"
-pushd "%SRC_DIR%\temp_build\cuda"
-start /wait setup.exe -s %ARGS%
-popd
-
-echo Installing VS integration...
-xcopy /Y "%SRC_DIR%\temp_build\cuda\CUDAVisualStudioIntegration\extras\visual_studio_integration\MSBuildExtensions\*.*" "C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\Common7\IDE\VC\VCTargets\BuildCustomizations"
-
-echo Installing NvToolsExt...
-7z x %SRC_DIR%\temp_build\NvToolsExt.7z -o"%SRC_DIR%\temp_build\NvToolsExt"
-mkdir "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\bin\x64"
-mkdir "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\include"
-mkdir "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\lib\x64"
-xcopy /Y "%SRC_DIR%\temp_build\NvToolsExt\bin\x64\*.*" "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\bin\x64"
-xcopy /Y "%SRC_DIR%\temp_build\NvToolsExt\include\*.*" "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\include"
-xcopy /Y "%SRC_DIR%\temp_build\NvToolsExt\lib\x64\*.*" "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\lib\x64"
-
-echo Setting up environment...
-set "PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\libnvvp;%PATH%"
-set "CUDA_PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%"
-set "CUDA_PATH_V%CUDA_VER_MAJOR%_%CUDA_VER_MINOR%=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%"
-set "NVTOOLSEXT_PATH=%ProgramFiles%\NVIDIA Corporation\NvToolsExt\bin\x64"
-
-if not exist "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin\nvcc.exe" (
-    echo CUDA %CUDA_VERSION_STR% installed failed.
-    exit /b 1
-)
-
-echo Installing cuDNN...
-7z x %CUDNN_SETUP_FILE% -o"%SRC_DIR%\temp_build\cudnn"
-xcopy /Y "%SRC_DIR%\temp_build\cudnn\cuda\bin\*.*" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin"
-xcopy /Y "%SRC_DIR%\temp_build\cudnn\cuda\lib\x64\*.*" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\lib\x64"
-xcopy /Y "%SRC_DIR%\temp_build\cudnn\cuda\include\*.*" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\include"
-
-echo Installing GPU driver DLLs
-7z x %SRC_DIR%\temp_build\gpu_driver_dlls.zip -o"C:\Windows\System32"
-
-echo Cleaning temp files
-rd /s /q "%SRC_DIR%\temp_build" || ver > nul
diff --git a/packaging/windows/internal/dep_install.bat b/packaging/windows/internal/dep_install.bat
deleted file mode 100644
index db665a9..0000000
--- a/packaging/windows/internal/dep_install.bat
+++ /dev/null
@@ -1,14 +0,0 @@
-@echo off
-
-REM curl -k https://www.7-zip.org/a/7z1805-x64.exe -O
-REM if errorlevel 1 exit /b 1
-
-REM start /wait 7z1805-x64.exe /S
-REM if errorlevel 1 exit /b 1
-
-REM set "PATH=%ProgramFiles%\7-Zip;%PATH%"
-
-choco feature disable --name showDownloadProgress
-choco feature enable --name allowGlobalConfirmation
-
-choco install curl 7zip
diff --git a/packaging/windows/internal/env_fix.bat b/packaging/windows/internal/env_fix.bat
deleted file mode 100644
index dd0aaf5..0000000
--- a/packaging/windows/internal/env_fix.bat
+++ /dev/null
@@ -1,31 +0,0 @@
-@echo off
-
-:: Caution: Please don't use this script locally
-:: It may destroy your build environment.
-
-setlocal
-
-IF NOT EXIST "%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" (
-    echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch on Windows
-    exit /b 1
-)
-
-for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do (
-    if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
-        set "VS15INSTALLDIR=%%i"
-        set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat"
-        goto vswhere
-    )
-)
-
-:vswhere
-
-IF "%VS15VCVARSALL%"=="" (
-    echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch on Windows
-    exit /b 1
-)
-
-call "%VS15VCVARSALL%" x86_amd64
-for /f "usebackq tokens=*" %%i in (`where link.exe`) do move "%%i" "%%i.bak"
-
-endlocal
diff --git a/packaging/windows/internal/nightly_defaults.bat b/packaging/windows/internal/nightly_defaults.bat
deleted file mode 100644
index 2b5ca5c..0000000
--- a/packaging/windows/internal/nightly_defaults.bat
+++ /dev/null
@@ -1,200 +0,0 @@
-@echo on
-
-if "%~1"=="" goto arg_error
-if NOT "%~2"=="" goto arg_error
-goto arg_end
-
-:arg_error
-
-echo Illegal number of parameters. Pass packge type `Conda` or `Wheels`.
-exit /b 1
-
-:arg_end
-
-echo "nightly_defaults.bat at %CD% starting at %DATE%"
-
-set SRC_DIR=%~dp0\..
-
-:: NIGHTLIES_FOLDER
-:: N.B. this is also defined in cron_start.sh
-::   An arbitrary root folder to store all nightlies folders, each of which is a
-::   parent level date folder with separate subdirs for logs, wheels, conda
-::   packages, etc. This should be kept the same across all scripts called in a
-::   cron job, so it only has a default value in the top-most script
-::   build_cron.sh to avoid the default values from diverging.
-if "%NIGHTLIES_FOLDER%" == "" set "NIGHTLIES_FOLDER=%SRC_DIR%"
-
-:: NIGHTLIES_DATE
-:: N.B. this is also defined in cron_start.sh
-::   The date in YYYY_mm_dd format that we are building for. If this is not
-::   already set, then this will first try to find the date of the nightlies
-::   folder that this builder repo exists in; e.g. if this script exists in
-::   some_dir/2019_09_04/builder/cron/ then this will be set to 2019_09_04 (must
-::   match YYYY_mm_dd). This is for convenience when debugging/uploading past
-::   dates, so that you don't have to set NIGHTLIES_DATE yourself. If a date
-::   folder cannot be found in that exact location, then this will default to
-::   the current date.
-
-
-if "%NIGHTLIES_DATE%" == "" ( goto date_start ) else ( goto date_end )
-
-:date_start
-
-set "DATE_CMD=Get-Date ([System.TimeZoneInfo]::ConvertTimeFromUtc((Get-Date).ToUniversalTime(), [System.TimeZoneInfo]::FindSystemTimeZoneById('Pacific Standard Time'))) -f 'yyyy_MM_dd'"
-set "DATE_COMPACT_CMD=Get-Date ([System.TimeZoneInfo]::ConvertTimeFromUtc((Get-Date).ToUniversalTime(), [System.TimeZoneInfo]::FindSystemTimeZoneById('Pacific Standard Time'))) -f 'yyyyMMdd'"
-
-FOR /F "delims=" %%i IN ('powershell -c "%DATE_CMD%"') DO set NIGHTLIES_DATE=%%i
-FOR /F "delims=" %%i IN ('powershell -c "%DATE_COMPACT_CMD%"') DO set NIGHTLIES_DATE_COMPACT=%%i
-
-:date_end
-
-if "%NIGHTLIES_DATE_COMPACT%" == "" set NIGHTLIES_DATE_COMPACT=%NIGHTLIES_DATE:~0,4%%NIGHTLIES_DATE:~5,2%%NIGHTLIES_DATE:~8,2%
-
-:: Used in lots of places as the root dir to store all conda/wheel/manywheel
-:: packages as well as logs for the day
-set today=%NIGHTLIES_FOLDER%\%NIGHTLIES_DATE%
-mkdir "%today%" || ver >nul
-
-
-::#############################################################################
-:: Add new configuration variables below this line. 'today' should always be
-:: defined ASAP to avoid weird errors
-::#############################################################################
-
-
-:: List of people to email when things go wrong. This is passed directly to
-:: `mail -t`
-:: TODO: Not supported yet
-if "%NIGHTLIES_EMAIL_LIST%" == "" set NIGHTLIES_EMAIL_LIST=peterghost86@gmail.com
-
-:: PYTORCH_CREDENTIALS_FILE
-::   A bash file that exports credentials needed to upload to aws and anaconda.
-::   Needed variables are PYTORCH_ANACONDA_USERNAME, PYTORCH_ANACONDA_PASSWORD,
-::   AWS_ACCESS_KEY_ID, and AWS_SECRET_ACCESS_KEY. Or it can just export the AWS
-::   keys and then prepend a logged-in conda installation to the path.
-:: TODO: Not supported yet
-if "%PYTORCH_CREDENTIALS_FILE%" == "" set PYTORCH_CREDENTIALS_FILE=/c/Users/administrator/nightlies/credentials.sh
-
-:: Location of the temporary miniconda that is downloaded to install conda-build
-:: and aws to upload finished packages TODO this is messy to install this in
-:: upload.sh and later use it in upload_logs.sh
-if "%CONDA_UPLOADER_INSTALLATION%" == "" set "CONDA_UPLOADER_INSTALLATION=%today%\miniconda"
-
-:: N.B. BUILDER_REPO and BUILDER_BRANCH are both set in cron_start.sh, as that
-:: is the script that actually clones the builder repo that /this/ script is
-:: running from.
-pushd "%SRC_DIR%\.."
-set NIGHTLIES_BUILDER_ROOT=%CD%
-popd
-
-:: The shared pytorch repo to be used by all builds
-if "%NIGHTLIES_PYTORCH_ROOT%" == "" set "NIGHTLIES_PYTORCH_ROOT=%today%\csprng"
-
-:: PYTORCH_REPO
-::   The Github org/user whose fork of Pytorch to check out (git clone
-::   https://github.com/<THIS_PART>/pytorch.git). This will always be cloned
-::   fresh to build with. Default is 'pytorch'
-if "%PYTORCH_REPO%" == "" set PYTORCH_REPO=pytorch
-
-:: PYTORCH_BRANCH
-::   The branch of Pytorch to checkout for building (git checkout <THIS_PART>).
-::   This can either be the name of the branch (e.g. git checkout
-::   my_branch_name) or can be a git commit (git checkout 4b2674n...). Default
-::   is 'latest', which is a special term that signals to pull the last commit
-::   before 0:00 midnight on the NIGHTLIES_DATE
-if "%PYTORCH_BRANCH%" == "" set PYTORCH_BRANCH=nightly
-
-:: Clone the requested pytorch checkout
-if exist "%NIGHTLIES_PYTORCH_ROOT%" ( goto clone_end ) else ( goto clone_start )
-
-:clone_start
-
-git clone --recursive "https://github.com/%PYTORCH_REPO%/csprng.git" "%NIGHTLIES_PYTORCH_ROOT%"
-pushd "%NIGHTLIES_PYTORCH_ROOT%"
-
-if "%PYTORCH_BRANCH%" == "latest" ( goto latest_start ) else ( goto latest_end )
-
-:latest_start
-
-:: Switch to the latest commit by 11:59 yesterday
-echo PYTORCH_BRANCH is set to latest so I will find the last commit
-echo before 0:00 midnight on %NIGHTLIES_DATE%
-set git_date=%NIGHTLIES_DATE:_=-%
-FOR /F "delims=" %%i IN ('git log --before %git_date% -n 1 "--pretty=%%H"') DO set last_commit=%%i
-echo Setting PYTORCH_BRANCH to %last_commit% since that was the last
-echo commit before %NIGHTLIES_DATE%
-set PYTORCH_BRANCH=%last_commit%
-
-:latest_end
-
-git checkout "%PYTORCH_BRANCH%"
-git submodule update
-popd
-
-:clone_end
-
-if "%CUDA_VERSION%" == "cpu" (
-    set _DESIRED_CUDA=cpu
-) else (
-    set _DESIRED_CUDA=cu%CUDA_VERSION%
-)
-
-:: PYTORCH_BUILD_VERSION
-::   The actual version string. Used in conda like
-::       pytorch-nightly==1.0.0.dev20180908
-::   or in manylinux like
-::       torch_nightly-1.0.0.dev20180908-cp27-cp27m-linux_x86_64.whl
-if "%TORCHCSPRNG_BUILD_VERSION%" == "" set TORCHCSPRNG_BUILD_VERSION=0.9.0.dev%NIGHTLIES_DATE_COMPACT%
-
-if "%~1" == "Wheels" (
-    if not "%CUDA_VERSION%" == "102" (
-        set TORCHCSPRNG_BUILD_VERSION=%TORCHCSPRNG_BUILD_VERSION%+%_DESIRED_CUDA%
-    )
-)
-
-:: PYTORCH_BUILD_NUMBER
-::   This is usually the number 1. If more than one build is uploaded for the
-::   same version/date, then this can be incremented to 2,3 etc in which case
-::   '.post2' will be appended to the version string of the package. This can
-::   be set to '0' only if OVERRIDE_PACKAGE_VERSION is being used to bypass
-::   all the version string logic in downstream scripts. Since we use the
-::   override below, exporting this shouldn't actually matter.
-if "%TORCHCSPRNG_BUILD_NUMBER%" == "" set /a TORCHCSPRNG_BUILD_NUMBER=1
-if %TORCHCSPRNG_BUILD_NUMBER% GTR 1 set TORCHCSPRNG_BUILD_VERSION=%TORCHCSPRNG_BUILD_VERSION%%TORCHCSPRNG_BUILD_NUMBER%
-
-:: The nightly builds use their own versioning logic, so we override whatever
-:: logic is in setup.py or other scripts
-:: TODO: Not supported yet
-set OVERRIDE_PACKAGE_VERSION=%TORCHCSPRNG_BUILD_VERSION%
-set BUILD_VERSION=%TORCHCSPRNG_BUILD_VERSION%
-
-:: Build folder for conda builds to use
-if "%TORCH_CONDA_BUILD_FOLDER%" == "" set TORCH_CONDA_BUILD_FOLDER=torchcsprng
-
-:: TORCH_PACKAGE_NAME
-::   The name of the package to upload. This should probably be pytorch or
-::   pytorch-nightly. N.B. that pip will change all '-' to '_' but conda will
-::   not. This is dealt with in downstream scripts.
-:: TODO: Not supported yet
-if "%TORCH_PACKAGE_NAME%" == "" set TORCH_PACKAGE_NAME=torchcsprng
-
-:: PIP_UPLOAD_FOLDER should end in a slash. This is to handle it being empty
-:: (when uploading to e.g. whl/cpu/) and also to handle nightlies (when
-:: uploading to e.g. /whl/nightly/cpu)
-:: TODO: Not supported yet
-if "%PIP_UPLOAD_FOLDER%" == "" set "PIP_UPLOAD_FOLDER=nightly\"
-
-:: The location of the binary_sizes dir in s3 is hardcoded into
-:: upload_binary_sizes.sh
-
-:: DAYS_TO_KEEP
-::   How many days to keep around for clean.sh. Build folders older than this
-::   will be purged at the end of cron jobs. '1' means to keep only the current
-::   day. Values less than 1 are not allowed. The default is 5.
-:: TODO: Not supported yet
-if "%DAYS_TO_KEEP%" == "" set /a DAYS_TO_KEEP=5
-if %DAYS_TO_KEEP% LSS 1 (
-    echo DAYS_TO_KEEP cannot be less than 1.
-    echo A value of 1 means to only keep the build for today
-    exit /b 1
-)
diff --git a/packaging/windows/internal/publish.bat b/packaging/windows/internal/publish.bat
deleted file mode 100644
index 7e820d7..0000000
--- a/packaging/windows/internal/publish.bat
+++ /dev/null
@@ -1,89 +0,0 @@
-@echo off
-
-set SRC_DIR=%~dp0
-pushd %SRC_DIR%
-
-if NOT "%CUDA_VERSION%" == "cpu" (
-    set PACKAGE_SUFFIX=_cuda%CUDA_VERSION%
-) else (
-    set PACKAGE_SUFFIX=
-)
-
-if "%PACKAGEFULLNAME%" == "Conda" (
-    set PACKAGE=conda
-) else (
-    set PACKAGE=wheels
-)
-
-if not defined PACKAGE_SUFFIX (
-    set PUBLISH_BRANCH=csprng_%PACKAGE%_%DESIRED_PYTHON%
-) else (
-    set PUBLISH_BRANCH=csprng_%PACKAGE%_%DESIRED_PYTHON%%PACKAGE_SUFFIX%
-)
-
-git clone %ARTIFACT_REPO_URL% -b %PUBLISH_BRANCH% --single-branch >nul 2>&1
-
-IF ERRORLEVEL 1 (
-    echo Branch %PUBLISH_BRANCH% not exist, falling back to master
-    set NO_BRANCH=1
-    git clone %ARTIFACT_REPO_URL% -b master --single-branch >nul 2>&1
-)
-
-IF ERRORLEVEL 1 (
-    echo Clone failed
-    goto err
-)
-
-cd pytorch_builder
-attrib -s -h -r . /s /d
-
-:: Empty repo
-rd /s /q . || ver >nul
-
-IF NOT EXIST %PACKAGE% mkdir %PACKAGE%
-
-xcopy /S /E /Y ..\..\output\*.* %PACKAGE%\
-
-git config --global user.name "Azure DevOps"
-git config --global user.email peterghost86@gmail.com
-git init
-git checkout --orphan %PUBLISH_BRANCH%
-git remote add origin %ARTIFACT_REPO_URL%
-git add .
-git commit -m "Update artifacts"
-
-:push
-
-if "%RETRY_TIMES%" == "" (
-    set /a RETRY_TIMES=10
-    set /a SLEEP_TIME=2
-) else (
-    set /a RETRY_TIMES=%RETRY_TIMES%-1
-    set /a SLEEP_TIME=%SLEEP_TIME%*2
-)
-
-git push origin %PUBLISH_BRANCH% -f > nul 2>&1
-
-IF ERRORLEVEL 1 (
-    echo Git push retry times remaining: %RETRY_TIMES%
-    echo Sleep time: %SLEEP_TIME% seconds
-    IF %RETRY_TIMES% EQU 0 (
-        echo Push failed
-        goto err
-    )
-    waitfor SomethingThatIsNeverHappening /t %SLEEP_TIME% 2>nul || ver >nul
-    goto push
-) ELSE (
-    set RETRY_TIMES=
-    set SLEEP_TIME=
-)
-
-popd
-
-exit /b 0
-
-:err
-
-popd
-
-exit /b 1
diff --git a/packaging/windows/internal/setup.bat b/packaging/windows/internal/setup.bat
deleted file mode 100644
index 96cb7fb..0000000
--- a/packaging/windows/internal/setup.bat
+++ /dev/null
@@ -1,44 +0,0 @@
-@echo off
-
-echo The flags after configuring:
-echo NO_CUDA=%NO_CUDA%
-echo CMAKE_GENERATOR=%CMAKE_GENERATOR%
-if "%NO_CUDA%"==""  echo CUDA_PATH=%CUDA_PATH%
-if NOT "%CC%"==""   echo CC=%CC%
-if NOT "%CXX%"==""  echo CXX=%CXX%
-if NOT "%DISTUTILS_USE_SDK%"==""  echo DISTUTILS_USE_SDK=%DISTUTILS_USE_SDK%
-
-set SRC_DIR=%~dp0\..
-
-IF "%VSDEVCMD_ARGS%" == "" (
-    call "%VS15VCVARSALL%" x64
-) ELSE (
-    call "%VS15VCVARSALL%" x64 %VSDEVCMD_ARGS%
-)
-
-pushd %SRC_DIR%
-
-IF NOT exist "setup.py" (
-    cd %MODULE_NAME%
-)
-
-if "%CXX%"=="sccache cl" (
-    sccache --stop-server
-    sccache --start-server
-    sccache --zero-stats
-)
-
-:pytorch
-:: This stores in e.g. D:/_work/1/s/windows/output/cpu
-pip wheel -e . --no-deps --wheel-dir ../output
-
-:build_end
-IF ERRORLEVEL 1 exit /b 1
-IF NOT ERRORLEVEL 0 exit /b 1
-
-if "%CXX%"=="sccache cl" (
-    taskkill /im sccache.exe /f /t || ver > nul
-    taskkill /im nvcc.exe /f /t || ver > nul
-)
-
-cd ..
diff --git a/packaging/windows/internal/test.bat b/packaging/windows/internal/test.bat
deleted file mode 100644
index 8e6878b..0000000
--- a/packaging/windows/internal/test.bat
+++ /dev/null
@@ -1,79 +0,0 @@
-@echo off
-
-set SRC_DIR=%~dp0\..
-pushd %SRC_DIR%
-
-set PYTHON_VERSION=%PYTHON_PREFIX:py=cp%
-
-if "%BUILD_CSPRNG%" == "" (
-    pip install future pytest coverage hypothesis protobuf
-) ELSE (
-    pip install future pytest "pillow>=4.1.1"
-)
-
-for /F "delims=" %%i in ('where /R %SRC_DIR%\output *%MODULE_NAME%*%PYTHON_VERSION%*.whl') do pip install "%%i"
-
-if ERRORLEVEL 1 exit /b 1
-
-if NOT "%BUILD_CSPRNG%" == "" (
-    echo Smoke testing imports
-    python -c "import torchcsprng"
-    if ERRORLEVEL 1 exit /b 1
-    goto smoke_test_end
-)
-
-echo Smoke testing imports
-python -c "import torch"
-if ERRORLEVEL 1 exit /b 1
-
-python -c "from caffe2.python import core"
-if ERRORLEVEL 1 exit /b 1
-
-echo Checking that MKL is available
-python -c "import torch; exit(0 if torch.backends.mkl.is_available() else 1)"
-if ERRORLEVEL 1 exit /b 1
-
-setlocal EnableDelayedExpansion
-set NVIDIA_GPU_EXISTS=0
-for /F "delims=" %%i in ('wmic path win32_VideoController get name') do (
-    set GPUS=%%i
-    if not "x!GPUS:NVIDIA=!" == "x!GPUS!" (
-        SET NVIDIA_GPU_EXISTS=1
-        goto gpu_check_end
-    )
-)
-:gpu_check_end
-endlocal & set NVIDIA_GPU_EXISTS=%NVIDIA_GPU_EXISTS%
-
-if NOT "%CUDA_PREFIX%" == "cpu" if "%NVIDIA_GPU_EXISTS%" == "1" (
-    echo Checking that CUDA archs are setup correctly
-    python -c "import torch; torch.randn([3,5]).cuda()"
-    if ERRORLEVEL 1 exit /b 1
-
-    echo Checking that magma is available
-    python -c "import torch; torch.rand(1).cuda(); exit(0 if torch.cuda.has_magma else 1)"
-    if ERRORLEVEL 1 exit /b 1
-
-    echo Checking that CuDNN is available
-    python -c "import torch; exit(0 if torch.backends.cudnn.is_available() else 1)"
-    if ERRORLEVEL 1 exit /b 1
-)
-:smoke_test_end
-
-echo Not running unit tests. Hopefully these problems are caught by CI
-goto test_end
-
-if "%BUILD_CSPRNG%" == "" (
-    cd pytorch\test
-    python run_test.py -v
-) else (
-    cd csprng
-    pytest .
-)
-
-if ERRORLEVEL 1 exit /b 1
-
-:test_end
-
-popd
-exit /b 0
diff --git a/packaging/windows/internal/upload.bat b/packaging/windows/internal/upload.bat
deleted file mode 100644
index f78fe0b..0000000
--- a/packaging/windows/internal/upload.bat
+++ /dev/null
@@ -1,96 +0,0 @@
-@echo off
-
-IF "%CONDA_UPLOADER_INSTALLATION%" == "" goto precheck_fail
-IF "%PYTORCH_FINAL_PACKAGE_DIR%" == "" goto precheck_fail
-IF "%today%" == "" goto precheck_fail
-IF "%PYTORCH_ANACONDA_USERNAME%" == "" goto precheck_fail
-IF "%PYTORCH_ANACONDA_PASSWORD%" == "" goto precheck_fail
-
-goto precheck_pass
-
-:precheck_fail
-
-echo Please run nightly_defaults.bat first.
-echo And remember to set `PYTORCH_FINAL_PACKAGE_DIR`
-echo Finally, don't forget to set anaconda tokens
-exit /b 1
-
-:precheck_pass
-
-pushd %today%
-
-:: Install anaconda client
-set "CONDA_HOME=%CONDA_UPLOADER_INSTALLATION%"
-set "tmp_conda=%CONDA_HOME%"
-set "miniconda_exe=%CD%\miniconda.exe"
-rmdir /s /q "%CONDA_HOME%"
-del miniconda.exe
-curl -k https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe -o "%miniconda_exe%"
-popd
-
-IF ERRORLEVEL 1 (
-    echo Conda download failed
-    exit /b 1
-)
-
-call %~dp0\..\..\conda\install_conda.bat
-
-IF ERRORLEVEL 1 (
-    echo Conda installation failed
-    exit /b 1
-)
-
-set "ORIG_PATH=%PATH%"
-set "PATH=%CONDA_HOME%;%CONDA_HOME%\scripts;%CONDA_HOME%\Library\bin;%PATH%"
-
-REM conda install -y anaconda-client
-pip install git+https://github.com/peterjc123/anaconda-client.git@log_more_meaningfull_errors
-IF ERRORLEVEL 1 (
-    echo Anaconda client installation failed
-    exit /b 1
-)
-
-set PYTORCH_FINAL_PACKAGE=
-:: Upload all the packages under `PYTORCH_FINAL_PACKAGE_DIR`
-FOR /F "delims=" %%i IN ('where /R %PYTORCH_FINAL_PACKAGE_DIR% *csprng*.tar.bz2') DO (
-    set "PYTORCH_FINAL_PACKAGE=%%i"
-)
-
-IF "%PYTORCH_FINAL_PACKAGE%" == "" (
-    echo No package to upload
-    exit /b 0
-)
-
-:upload
-
-if "%RETRY_TIMES%" == "" (
-    set /a RETRY_TIMES=10
-    set /a SLEEP_TIME=2
-) else (
-    set /a RETRY_TIMES=%RETRY_TIMES%-1
-    set /a SLEEP_TIME=%SLEEP_TIME%*2
-)
-
-REM bash -c "yes | anaconda login --username "%PYTORCH_ANACONDA_USERNAME%" --password "%PYTORCH_ANACONDA_PASSWORD%""
-anaconda login --username "%PYTORCH_ANACONDA_USERNAME%" --password "%PYTORCH_ANACONDA_PASSWORD%"
-IF ERRORLEVEL 1 (
-    echo Anaconda client login failed
-    exit /b 1
-)
-
-echo Uploading %PYTORCH_FINAL_PACKAGE% to Anaconda Cloud
-anaconda upload "%PYTORCH_FINAL_PACKAGE%" -u pytorch-nightly --label main --force --no-progress
-
-IF ERRORLEVEL 1 (
-    echo Anaconda upload retry times remaining: %RETRY_TIMES%
-    echo Sleep time: %SLEEP_TIME% seconds
-    IF %RETRY_TIMES% EQU 0 (
-        echo Upload failed
-        exit /b 1
-    )
-    waitfor SomethingThatIsNeverHappening /t %SLEEP_TIME% 2>nul || ver >nul
-    goto upload
-) ELSE (
-    set RETRY_TIMES=
-    set SLEEP_TIME=
-)
diff --git a/packaging/windows/internal/vc_env_helper.bat b/packaging/windows/internal/vc_env_helper.bat
deleted file mode 100644
index e85a372..0000000
--- a/packaging/windows/internal/vc_env_helper.bat
+++ /dev/null
@@ -1,43 +0,0 @@
-@echo on
-
-set VC_VERSION_LOWER=16
-set VC_VERSION_UPPER=17
-if "%VC_YEAR%" == "2017" (
-    set VC_VERSION_LOWER=15
-    set VC_VERSION_UPPER=16
-)
-
-for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [%VC_VERSION_LOWER%^,%VC_VERSION_UPPER%^) -property installationPath`) do (
-    if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
-        set "VS15INSTALLDIR=%%i"
-        set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat"
-        goto vswhere
-    )
-)
-
-:vswhere
-if "%VSDEVCMD_ARGS%" == "" (
-    call "%VS15VCVARSALL%" x64 || exit /b 1
-) else (
-    call "%VS15VCVARSALL%" x64 %VSDEVCMD_ARGS% || exit /b 1
-)
-
-@echo on
-
-set DISTUTILS_USE_SDK=1
-
-set args=%1
-shift
-:start
-if [%1] == [] goto done
-set args=%args% %1
-shift
-goto start
-
-:done
-if "%args%" == "" (
-    echo Usage: vc_env_helper.bat [command] [args]
-    echo e.g. vc_env_helper.bat cl /c test.cpp
-)
-
-%args% || exit /b 1
diff --git a/packaging/windows/internal/vc_install_helper.sh b/packaging/windows/internal/vc_install_helper.sh
deleted file mode 100644
index cdae180..0000000
--- a/packaging/windows/internal/vc_install_helper.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-if [[ "$CU_VERSION" == "cu92" ]]; then
-  export VC_YEAR=2017
-  export VSDEVCMD_ARGS="-vcvars_ver=14.13"
-  powershell packaging/windows/internal/vs2017_install.ps1
-elif [[ "$CU_VERSION" == "cu100" ]]; then
-  export VC_YEAR=2017
-  export VSDEVCMD_ARGS=""
-  powershell packaging/windows/internal/vs2017_install.ps1
-else
-  export VC_YEAR=2019
-  export VSDEVCMD_ARGS=""
-fi
diff --git a/packaging/windows/internal/vs2017_install.ps1 b/packaging/windows/internal/vs2017_install.ps1
deleted file mode 100644
index 3e953de..0000000
--- a/packaging/windows/internal/vs2017_install.ps1
+++ /dev/null
@@ -1,25 +0,0 @@
-$VS_DOWNLOAD_LINK = "https://aka.ms/vs/15/release/vs_buildtools.exe"
-$VS_INSTALL_ARGS = @("--nocache","--quiet","--wait", "--add Microsoft.VisualStudio.Workload.VCTools",
-                                                     "--add Microsoft.VisualStudio.Component.VC.Tools.14.13",
-                                                     "--add Microsoft.Component.MSBuild",
-                                                     "--add Microsoft.VisualStudio.Component.Roslyn.Compiler",
-                                                     "--add Microsoft.VisualStudio.Component.TextTemplating",
-                                                     "--add Microsoft.VisualStudio.Component.VC.CoreIde",
-                                                     "--add Microsoft.VisualStudio.Component.VC.Redist.14.Latest",
-                                                     "--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Core",
-                                                     "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64",
-                                                     "--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Win81")
-
-curl.exe --retry 3 -kL $VS_DOWNLOAD_LINK --output vs_installer.exe
-if ($LASTEXITCODE -ne 0) {
-    echo "Download of the VS 2017 installer failed"
-    exit 1
-}
-
-$process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_INSTALL_ARGS -NoNewWindow -Wait -PassThru
-Remove-Item -Path vs_installer.exe -Force
-$exitCode = $process.ExitCode
-if (($exitCode -ne 0) -and ($exitCode -ne 3010)) {
-    echo "VS 2017 installer exited with code $exitCode, which should be one of [0, 3010]."
-    exit 1
-}
diff --git a/packaging/windows/internal/vs2019_install.ps1 b/packaging/windows/internal/vs2019_install.ps1
deleted file mode 100644
index e436051..0000000
--- a/packaging/windows/internal/vs2019_install.ps1
+++ /dev/null
@@ -1,21 +0,0 @@
-$VS_DOWNLOAD_LINK = "https://aka.ms/vs/16/release/vs_buildtools.exe"
-$VS_INSTALL_ARGS = @("--nocache","--quiet","--wait", "--add Microsoft.VisualStudio.Workload.VCTools",
-                                                     "--add Microsoft.Component.MSBuild",
-                                                     "--add Microsoft.VisualStudio.Component.Roslyn.Compiler",
-                                                     "--add Microsoft.VisualStudio.Component.VC.CoreBuildTools",
-                                                     "--add Microsoft.VisualStudio.Component.VC.Redist.14.Latest",
-                                                     "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64")
-
-curl.exe --retry 3 -kL $VS_DOWNLOAD_LINK --output vs_installer.exe
-if ($LASTEXITCODE -ne 0) {
-    echo "Download of the VS 2019 installer failed"
-    exit 1
-}
-
-$process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_INSTALL_ARGS -NoNewWindow -Wait -PassThru
-Remove-Item -Path vs_installer.exe -Force
-$exitCode = $process.ExitCode
-if (($exitCode -ne 0) -and ($exitCode -ne 3010)) {
-    echo "VS 2019 installer exited with code $exitCode, which should be one of [0, 3010]."
-    exit 1
-}
diff --git a/packaging/windows/internal/vs_install.bat b/packaging/windows/internal/vs_install.bat
deleted file mode 100644
index 348a5e3..0000000
--- a/packaging/windows/internal/vs_install.bat
+++ /dev/null
@@ -1,14 +0,0 @@
-@echo off
-
-set VS_DOWNLOAD_LINK=https://aka.ms/vs/15/release/vs_enterprise.exe
-set VS_INSTALL_PATH=C:\Program Files (x86)\Microsoft Visual Studio\2017\Enterprise
-set VS_INSTALL_ARGS=--nocache --quiet --wait --add Microsoft.VisualStudio.Component.VC.Tools.14.11
-set VSDEVCMD_ARGS=-vcvars_ver=14.11
-
-curl -k -L %VS_DOWNLOAD_LINK% --output vs_installer.exe
-if errorlevel 1 exit /b 1
-
-start /wait vs_installer.exe modify --installPath "%VS_INSTALL_PATH%" %VS_INSTALL_ARGS%
-if not errorlevel 0 exit /b 1
-if errorlevel 1 if not errorlevel 3010 exit /b 1
-if errorlevel 3011 exit /b 1
diff --git a/packaging/windows/old/cuda100.bat b/packaging/windows/old/cuda100.bat
deleted file mode 100644
index f088bca..0000000
--- a/packaging/windows/old/cuda100.bat
+++ /dev/null
@@ -1,59 +0,0 @@
-@echo off
-
-IF NOT "%BUILD_CSPRNG%" == "" (
-    set MODULE_NAME=csprng
-) ELSE (
-    set MODULE_NAME=pytorch
-)
-
-IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" (
-    call internal\clone.bat
-    cd ..
-    IF ERRORLEVEL 1 goto eof
-) ELSE (
-    call internal\clean.bat
-)
-
-call internal\check_deps.bat
-IF ERRORLEVEL 1 goto eof
-
-REM Check for optional components
-
-set NO_CUDA=
-set CMAKE_GENERATOR=Visual Studio 15 2017 Win64
-
-IF "%NVTOOLSEXT_PATH%"=="" (
-    echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing
-    exit /b 1
-    goto optcheck
-)
-
-IF "%CUDA_PATH_V10_0%"=="" (
-    echo CUDA 10.0 not found, failing
-    exit /b 1
-) ELSE (
-    IF "%BUILD_CSPRNG%" == "" (
-        set TORCH_CUDA_ARCH_LIST=3.5;5.0+PTX;6.0;6.1;7.0;7.5
-        set TORCH_NVCC_FLAGS=-Xfatbin -compress-all
-    ) ELSE (
-        set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50
-    )
-
-    set "CUDA_PATH=%CUDA_PATH_V10_0%"
-    set "PATH=%CUDA_PATH_V10_0%\bin;%PATH%"
-)
-
-:optcheck
-
-IF "%BUILD_CSPRNG%" == "" (
-    call internal\check_opts.bat
-    IF ERRORLEVEL 1 goto eof
-
-    call internal\copy.bat
-    IF ERRORLEVEL 1 goto eof
-)
-
-call internal\setup.bat
-IF ERRORLEVEL 1 goto eof
-
-:eof
diff --git a/packaging/windows/old/cuda90.bat b/packaging/windows/old/cuda90.bat
deleted file mode 100644
index 520b794..0000000
--- a/packaging/windows/old/cuda90.bat
+++ /dev/null
@@ -1,59 +0,0 @@
-@echo off
-
-IF NOT "%BUILD_CSPRNG%" == "" (
-    set MODULE_NAME=csprng
-) ELSE (
-    set MODULE_NAME=pytorch
-)
-
-IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" (
-    call internal\clone.bat
-    cd ..
-    IF ERRORLEVEL 1 goto eof
-) ELSE (
-    call internal\clean.bat
-)
-
-call internal\check_deps.bat
-IF ERRORLEVEL 1 goto eof
-
-REM Check for optional components
-
-set NO_CUDA=
-set CMAKE_GENERATOR=Visual Studio 15 2017 Win64
-
-IF "%NVTOOLSEXT_PATH%"=="" (
-    echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing
-    exit /b 1
-    goto optcheck
-)
-
-IF "%CUDA_PATH_V9_0%"=="" (
-    echo CUDA 9 not found, failing
-    exit /b 1
-) ELSE (
-    IF "%BUILD_CSPRNG%" == "" (
-        set TORCH_CUDA_ARCH_LIST=3.5;5.0+PTX;6.0;7.0
-        set TORCH_NVCC_FLAGS=-Xfatbin -compress-all
-    ) ELSE (
-        set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50
-    )
-
-    set "CUDA_PATH=%CUDA_PATH_V9_0%"
-    set "PATH=%CUDA_PATH_V9_0%\bin;%PATH%"
-)
-
-:optcheck
-
-IF "%BUILD_CSPRNG%" == "" (
-    call internal\check_opts.bat
-    IF ERRORLEVEL 1 goto eof
-
-    call internal\copy.bat
-    IF ERRORLEVEL 1 goto eof
-)
-
-call internal\setup.bat
-IF ERRORLEVEL 1 goto eof
-
-:eof
diff --git a/packaging/windows/templates/auth_task.yml b/packaging/windows/templates/auth_task.yml
deleted file mode 100644
index 7554ffa..0000000
--- a/packaging/windows/templates/auth_task.yml
+++ /dev/null
@@ -1,17 +0,0 @@
-jobs:
-- job: 'VSTS_Auth_Task'
-  timeoutInMinutes: 5
-  cancelTimeoutInMinutes: 5
-  variables:
-  - group: 'peterjc-vsts-token'
-
-  pool:
-    vmImage: 'vs2017-win2016'
-
-  steps:
-  - checkout: self
-    clean: true
-
-  - template: vsts_auth.yml
-    parameters:
-      auth: $(vsts_auth)
diff --git a/packaging/windows/templates/build_conda.yml b/packaging/windows/templates/build_conda.yml
deleted file mode 100644
index ce29c06..0000000
--- a/packaging/windows/templates/build_conda.yml
+++ /dev/null
@@ -1,15 +0,0 @@
-parameters:
-  msagent: false
-
-steps:
-- bash: 'find . -name "*.sh" -exec dos2unix {} +'
-  displayName: Replace file endings
-
-- script: 'if not exist %PYTORCH_FINAL_PACKAGE_DIR% mkdir %PYTORCH_FINAL_PACKAGE_DIR%'
-  displayName: 'Create final package directory'
-
-- bash: './packaging/conda/build_csprng.sh $CUDA_VERSION $TORCHCSPRNG_BUILD_VERSION $TORCHCSPRNG_BUILD_NUMBER'
-  displayName: Build
-  env:
-    ${{ if eq(parameters.msagent, 'true') }}:
-      MAX_JOBS: 2
diff --git a/packaging/windows/templates/build_task.yml b/packaging/windows/templates/build_task.yml
deleted file mode 100644
index 18d4f8e..0000000
--- a/packaging/windows/templates/build_task.yml
+++ /dev/null
@@ -1,173 +0,0 @@
-parameters:
-  package: ''
-  spec: ''
-  jobDesc: ''
-  packageDesc: ''
-  msagent: true
-  cpuEnabled: true
-  cudaEnabled: true
-  condaEnabled: true
-  wheelsEnabled: true
-  override: false
-
-jobs:
-- job: 'Windows_${{ parameters.spec }}_${{ parameters.package }}_Build'
-  timeoutInMinutes: 60
-  cancelTimeoutInMinutes: 5
-  condition: > 
-    or(and(eq('${{ parameters.package }}', 'Conda'), eq('${{ parameters.spec }}', 'CPU'),
-           eq('${{ parameters.condaEnabled }}', 'true'), eq('${{ parameters.cpuEnabled }}', 'true')),
-       and(eq('${{ parameters.package }}', 'Wheels'), eq('${{ parameters.spec }}', 'CPU'),
-           eq('${{ parameters.wheelsEnabled }}', 'true'), eq('${{ parameters.cpuEnabled }}', 'true')),
-       and(eq('${{ parameters.package }}', 'Conda'), eq('${{ parameters.spec }}', 'CUDA'),
-           eq('${{ parameters.condaEnabled }}', 'true'), eq('${{ parameters.cudaEnabled }}', 'true')),
-       and(eq('${{ parameters.package }}', 'Wheels'), eq('${{ parameters.spec }}', 'CUDA'),
-           eq('${{ parameters.wheelsEnabled }}', 'true'), eq('${{ parameters.cudaEnabled }}', 'true')))
-  variables:
-    - ${{ if eq(parameters.override, 'true') }}:
-      - name: TORCHCSPRNG_BUILD_NUMBER
-        value: 1
-      - name: PYTORCH_REPO
-        value: 'pytorch'
-      - name: PYTORCH_BRANCH
-        value: 'v0.4.0'
-    - ${{ if eq(parameters.msagent, 'true') }}:
-      - name: USE_SCCACHE
-        value: 0
-    - ${{ if eq(parameters.msagent, 'false') }}:
-      - name: USE_SCCACHE
-        value: 1
-    - ${{ if eq(parameters.package, 'Conda') }}:
-      - group: peterjc_anaconda_token
-      - name: PYTORCH_FINAL_PACKAGE_DIR
-        value: '$(Build.Repository.LocalPath)\packaging\windows\output'
-      
-  strategy:
-    maxParallel: 10
-    matrix:
-      ${{ if eq(parameters.spec, 'CPU') }}:
-        PY3.5:
-          DESIRED_PYTHON: 3.5
-          CUDA_VERSION: cpu
-        PY3.6:
-          DESIRED_PYTHON: 3.6
-          CUDA_VERSION: cpu
-        PY3.7:
-          DESIRED_PYTHON: 3.7
-          CUDA_VERSION: cpu
-        PY3.8:
-          DESIRED_PYTHON: 3.8
-          CUDA_VERSION: cpu
-        PY3.9:
-          DESIRED_PYTHON: 3.9
-          CUDA_VERSION: cpu
-      ${{ if ne(parameters.spec, 'CPU') }}:
-        PY3.5_92:
-          DESIRED_PYTHON: 3.5
-          CUDA_VERSION: 92
-        PY3.6_92:
-          DESIRED_PYTHON: 3.6
-          CUDA_VERSION: 92
-        PY3.7_92:
-          DESIRED_PYTHON: 3.7
-          CUDA_VERSION: 92
-        PY3.8_92:
-          DESIRED_PYTHON: 3.8
-          CUDA_VERSION: 92
-        PY3.9_92:
-          DESIRED_PYTHON: 3.9
-          CUDA_VERSION: 92
-        PY3.5_101:
-          DESIRED_PYTHON: 3.5
-          CUDA_VERSION: 101
-        PY3.6_101:
-          DESIRED_PYTHON: 3.6
-          CUDA_VERSION: 101
-        PY3.7_101:
-          DESIRED_PYTHON: 3.7
-          CUDA_VERSION: 101
-        PY3.8_101:
-          DESIRED_PYTHON: 3.8
-          CUDA_VERSION: 101
-        PY3.9_101:
-          DESIRED_PYTHON: 3.9
-          CUDA_VERSION: 101
-        PY3.5_102:
-          DESIRED_PYTHON: 3.5
-          CUDA_VERSION: 102
-        PY3.6_102:
-          DESIRED_PYTHON: 3.6
-          CUDA_VERSION: 102
-        PY3.7_102:
-          DESIRED_PYTHON: 3.7
-          CUDA_VERSION: 102
-        PY3.8_102:
-          DESIRED_PYTHON: 3.8
-          CUDA_VERSION: 102
-        PY3.9_102:
-          DESIRED_PYTHON: 3.9
-          CUDA_VERSION: 102
-
-  pool:
-    ${{ if eq(parameters.msagent, 'true') }}:
-      vmImage: 'vs2017-win2016'
-    ${{ if eq(parameters.msagent, 'false') }}:
-      name: 'release'
-
-  steps:
-  - checkout: self
-    clean: true
-
-  - template: setup_env_for_msagent.yml
-    parameters:
-      msagent: ${{ parameters.msagent }}
-
-  # - ${{ if and(eq(parameters.override, 'true'),  eq(parameters.package, 'Wheels')) }}:
-  #   - template: override_pytorch_version.yml
-
-  - template: setup_nightly_variables.yml
-    parameters:
-      package: ${{ parameters.package }}
-
-  - ${{ if eq(parameters.package, 'Wheels') }}:
-    - template: build_wheels.yml
-      parameters:
-        msagent: ${{ parameters.msagent }}
-
-  - ${{ if eq(parameters.package, 'Conda') }}:
-    - template: build_conda.yml
-      parameters:
-        msagent: ${{ parameters.msagent }}
-
-  - ${{ if or(eq(parameters.package, 'Wheels'), eq(parameters.package, 'Conda')) }}:
-    - template: publish_test_results.yml
-      parameters:
-        msagent: ${{ parameters.msagent }}
-
-  # If you want to upload binaries to S3 & Anaconda Cloud, please uncomment this section.
-  - ${{ if and(eq(parameters.package, 'Wheels'), eq(parameters.spec, 'CPU')) }}:
-    - template: upload_to_s3.yml
-      parameters:
-        cuVer: '$(CUDA_VERSION)'
-        cudaVer: '$(CUDA_VERSION)'
-
-  - ${{ if and(eq(parameters.package, 'Wheels'), ne(parameters.spec, 'CPU')) }}:
-    - template: upload_to_s3.yml
-      parameters:
-        cuVer: 'cu$(CUDA_VERSION)'
-        cudaVer: 'cuda$(CUDA_VERSION)'
-
-  - ${{ if eq(parameters.package, 'Conda') }}:
-    - template: upload_to_conda.yml
-      parameters:
-        user: $(peterjc_conda_username)
-        pass: $(peterjc_conda_password)
-
-  # If you want to upload binaries to Azure Git, please uncomment this section.
-  # - ${{ if or(eq(parameters.package, 'Wheels'), eq(parameters.package, 'Conda')) }}:
-  #   - template: publish_test_results.yml
-  #     parameters:
-  #       msagent: ${{ parameters.msagent }}
-  #   - template: publish_packages.yml
-  #     parameters:
-  #       package: ${{ parameters.package }}
diff --git a/packaging/windows/templates/build_wheels.yml b/packaging/windows/templates/build_wheels.yml
deleted file mode 100644
index 8393fdb..0000000
--- a/packaging/windows/templates/build_wheels.yml
+++ /dev/null
@@ -1,9 +0,0 @@
-parameters:
-  msagent: false
-
-steps:
-- script: 'call packaging/windows/build_csprng.bat %CUDA_VERSION% %TORCHCSPRNG_BUILD_VERSION% %TORCHCSPRNG_BUILD_NUMBER%'
-  displayName: Build
-  env:
-    ${{ if eq(parameters.msagent, 'true') }}:
-      MAX_JOBS: 2
diff --git a/packaging/windows/templates/linux_build_task.yml b/packaging/windows/templates/linux_build_task.yml
deleted file mode 100644
index 0b32892..0000000
--- a/packaging/windows/templates/linux_build_task.yml
+++ /dev/null
@@ -1,38 +0,0 @@
-parameters:
-  msagent: true
-  enabled: false
-
-jobs:
-- job: 'Linux_CPU_Conda_Build'
-  timeoutInMinutes: 0
-  cancelTimeoutInMinutes: 5
-  condition: ${{ eq(parameters.enabled, 'true') }}
-  variables:
-    CUDA_VERSION: cpu
-    TORCH_CONDA_BUILD_FOLDER: pytorch-nightly
-    PYTORCH_FINAL_PACKAGE_DIR: '$(Build.Repository.LocalPath)/output'
-
-  strategy:
-    maxParallel: 10
-    matrix:
-      PY3.5:
-        DESIRED_PYTHON: 3.5
-
-  pool:
-    vmImage: 'ubuntu-16.04'
-
-  steps:
-  - checkout: self
-    clean: true
-
-  - script: 'sudo apt-get install p7zip-full'
-    displayName: 'Install 7Zip'
-
-  - task: CondaEnvironment@1
-    displayName: 'Install conda-build'
-    inputs:
-      packageSpecs: 'conda-build'
-
-  - template: build_conda.yml
-    parameters:
-      msagent: ${{ parameters.msagent }}
diff --git a/packaging/windows/templates/override_pytorch_version.yml b/packaging/windows/templates/override_pytorch_version.yml
deleted file mode 100644
index 8af93ae..0000000
--- a/packaging/windows/templates/override_pytorch_version.yml
+++ /dev/null
@@ -1,6 +0,0 @@
-steps:
-- script: 'windows/internal/override_pytorch_version.bat'
-  displayName: 'Override PyTorch Build Version for Wheels'
-
-- script: 'echo $(PYTORCH_BUILD_VERSION)'
-  displayName: 'Show PyTorch Build Version'
diff --git a/packaging/windows/templates/publish_packages.yml b/packaging/windows/templates/publish_packages.yml
deleted file mode 100644
index 51ce824..0000000
--- a/packaging/windows/templates/publish_packages.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-parameters:
-  package: ''
-
-steps:
-- script: 'packaging/windows/internal/publish.bat'
-  displayName: 'Upload packages to Azure DevOps Repo'
-  env:
-    PACKAGEFULLNAME: ${{ parameters.package }}
diff --git a/packaging/windows/templates/publish_test_results.yml b/packaging/windows/templates/publish_test_results.yml
deleted file mode 100644
index 1e0dc02..0000000
--- a/packaging/windows/templates/publish_test_results.yml
+++ /dev/null
@@ -1,6 +0,0 @@
-steps:
-- task: PublishTestResults@2 # No test results to publish
-  inputs:
-    testResultsFiles: 'windows/pytorch/test/**/*.xml'
-    testRunTitle: 'Publish test results'
-  enabled: false
diff --git a/packaging/windows/templates/setup_env_for_msagent.yml b/packaging/windows/templates/setup_env_for_msagent.yml
deleted file mode 100644
index 377734f..0000000
--- a/packaging/windows/templates/setup_env_for_msagent.yml
+++ /dev/null
@@ -1,25 +0,0 @@
-parameters:
-   msagent: false
-
-steps:
-- ${{ if eq(parameters.msagent, 'true') }}:
-  - task: BatchScript@1
-    displayName: 'Install 7Zip & cURL'
-    inputs:
-      filename: 'packaging/windows/internal/dep_install.bat'
-
-      modifyEnvironment: true
-
-  - task: BatchScript@1
-    displayName: 'Install Visual Studio 2017'
-    inputs:
-      filename: 'packaging/windows/internal/vs_install.bat'
-
-      modifyEnvironment: true
-
-  - task: BatchScript@1
-    displayName: 'Install CUDA'
-    inputs:
-      filename: 'packaging/windows/internal/cuda_install.bat'
-
-      modifyEnvironment: true
diff --git a/packaging/windows/templates/setup_nightly_variables.yml b/packaging/windows/templates/setup_nightly_variables.yml
deleted file mode 100644
index 94b2fe9..0000000
--- a/packaging/windows/templates/setup_nightly_variables.yml
+++ /dev/null
@@ -1,11 +0,0 @@
-parameters:
-  package: ''
-
-steps:
-- task: BatchScript@1
-  displayName: 'Setup nightly variables'
-  inputs:
-    filename: 'packaging/windows/internal/nightly_defaults.bat'
-    arguments: ${{ parameters.package }}
-
-    modifyEnvironment: true
diff --git a/packaging/windows/templates/upload_to_conda.yml b/packaging/windows/templates/upload_to_conda.yml
deleted file mode 100644
index dc172bc..0000000
--- a/packaging/windows/templates/upload_to_conda.yml
+++ /dev/null
@@ -1,10 +0,0 @@
-parameters:
-  user: ''
-  pass: ''
-
-steps:
-- script: 'call packaging/windows/internal/upload.bat'
-  displayName: 'Upload packages to Anaconda Cloud'
-  env:
-    PYTORCH_ANACONDA_USERNAME: ${{ parameters.user }}
-    PYTORCH_ANACONDA_PASSWORD: ${{ parameters.pass }}
diff --git a/packaging/windows/templates/upload_to_s3.yml b/packaging/windows/templates/upload_to_s3.yml
deleted file mode 100644
index 1de91b5..0000000
--- a/packaging/windows/templates/upload_to_s3.yml
+++ /dev/null
@@ -1,15 +0,0 @@
-parameters:
-  cuVer: ''
-  cudaVer: ''
-
-steps:
-- task: AmazonWebServices.aws-vsts-tools.S3Upload.S3Upload@1
-  displayName: 'Upload ${{ parameters.cuVer }} wheel to S3'
-  inputs:
-    awsCredentials: 'Pytorch S3 bucket'
-    bucketName: 'pytorch'
-    sourceFolder: 'packaging/windows/output'
-    globExpressions: '*.whl'
-    targetFolder: 'whl/nightly/${{ parameters.cuVer }}/'
-    filesAcl: 'public-read'
-    flattenFolders: 'true'
diff --git a/packaging/windows/templates/vsts_auth.yml b/packaging/windows/templates/vsts_auth.yml
deleted file mode 100644
index fde767d..0000000
--- a/packaging/windows/templates/vsts_auth.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-parameters:
-  auth: ''
-
-steps:
-- script: 'call packaging/windows/internal/auth.bat'
-  displayName: 'Sign in to Azure Pipelines'
-  env:
-    VSTS_AUTH: ${{ parameters.auth }}
diff --git a/test/__init__.py b/test/__init__.py
deleted file mode 100644
index b8c6945..0000000
--- a/test/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
diff --git a/test/test_csprng.py b/test/test_csprng.py
deleted file mode 100644
index a85b7b8..0000000
--- a/test/test_csprng.py
+++ /dev/null
@@ -1,654 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-import math
-import os
-import random
-import time
-import unittest
-
-import numpy as np
-import torch
-from Crypto.Cipher import AES
-from Crypto.Util import Counter
-from scipy import stats
-
-try:
-    import torchcsprng as csprng
-except ImportError:
-    raise RuntimeError("CSPRNG not available")
-
-IS_SANDCASTLE = (
-    os.getenv("SANDCASTLE") == "1" or os.getenv("TW_JOB_USER") == "sandcastle"
-)
-IS_FBCODE = os.getenv("PYTORCH_TEST_FBCODE") == "1"
-
-
-def to_numpy(t, dtype=torch.float):
-    if t.dtype == torch.bfloat16:
-        t = t.to(dtype)
-    return t.numpy()
-
-
-def to_bytes(t):
-    if t.dtype == torch.bfloat16:
-        t = t.view(torch.int16)
-    return t.cpu().numpy().view(np.int8)
-
-
-class TestCSPRNG(unittest.TestCase):
-
-    all_generators = [
-        csprng.create_random_device_generator(),
-        csprng.create_random_device_generator("/dev/urandom"),
-        csprng.create_mt19937_generator(),
-        csprng.create_mt19937_generator(42),
-    ]
-
-    int_dtypes = [torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64]
-
-    standard_fp_dtypes = [torch.float, torch.double]
-
-    non_standard_fp_dtypes = [torch.half, torch.bfloat16]
-
-    fp_dtypes = standard_fp_dtypes + non_standard_fp_dtypes
-
-    num_dtypes = int_dtypes + fp_dtypes
-
-    all_dtypes = num_dtypes + [torch.bool]
-
-    size = 1000
-
-    all_devices = (
-        ["cpu", "cuda"]
-        if (torch.cuda.is_available() and csprng.supports_cuda())
-        else ["cpu"]
-    )
-
-    def test_random_kstest(self):
-        for device in self.all_devices:
-            for gen in self.all_generators:
-                for dtype in self.num_dtypes:
-                    if dtype == torch.float:
-                        to_inc = 2**24
-                    elif dtype == torch.double:
-                        to_inc = 2**53
-                    elif dtype == torch.half:
-                        to_inc = 2**11
-                    elif dtype == torch.bfloat16:
-                        to_inc = 2**8
-                    else:
-                        to_inc = torch.iinfo(dtype).max
-
-                    t = torch.empty(self.size, dtype=dtype, device=device).random_(
-                        generator=gen
-                    )
-                    res = stats.kstest(
-                        to_numpy(t.cpu()), stats.randint.cdf, args=(0, to_inc)
-                    )
-                    self.assertTrue(res.statistic < 0.1)
-
-    no_cuda = not torch.cuda.is_available() or not csprng.supports_cuda()
-
-    no_cuda_message = (
-        "CUDA is not available or csprng was not compiled with CUDA support"
-    )
-
-    @unittest.skipIf(no_cuda, no_cuda_message)
-    def test_random_cpu_vs_cuda(self):
-        for dtype in self.num_dtypes:
-            gen = csprng.create_mt19937_generator(42)
-            cpu_t = torch.empty(self.size, dtype=dtype, device="cpu").random_(
-                generator=gen
-            )
-            gen = csprng.create_mt19937_generator(42)
-            cuda_t = torch.empty(self.size, dtype=dtype, device="cuda").random_(
-                generator=gen
-            )
-            self.assertTrue((cpu_t == cuda_t.cpu()).all())
-
-    def test_random_to_kstest(self):
-        to_ = 42
-        for device in self.all_devices:
-            for gen in self.all_generators:
-                for dtype in self.num_dtypes:
-                    t = torch.zeros(self.size, dtype=dtype, device=device).random_(
-                        to_, generator=gen
-                    )
-                    res = stats.kstest(
-                        to_numpy(t.cpu()), stats.randint.cdf, args=(0, to_)
-                    )
-                    self.assertTrue(res.statistic < 0.1)
-
-    @unittest.skipIf(no_cuda, no_cuda_message)
-    def test_random_to_cpu_vs_cuda(self):
-        to_ = 42
-        for dtype in self.num_dtypes:
-            gen = csprng.create_mt19937_generator(42)
-            cpu_t = torch.zeros(self.size, dtype=dtype, device="cpu").random_(
-                to_, generator=gen
-            )
-            gen = csprng.create_mt19937_generator(42)
-            cuda_t = torch.zeros(self.size, dtype=dtype, device="cuda").random_(
-                to_, generator=gen
-            )
-            self.assertTrue((cpu_t == cuda_t.cpu()).all())
-
-    def test_random_from_to_kstest(self):
-        for device in self.all_devices:
-            for gen in self.all_generators:
-                for dtype in self.num_dtypes:
-                    for from_ in [0, 24, 42]:
-                        for to_ in [42, 99, 123]:
-                            if from_ < to_:
-                                t = torch.zeros(
-                                    self.size, dtype=dtype, device=device
-                                ).random_(from_, to_, generator=gen)
-                                res = stats.kstest(
-                                    to_numpy(t.cpu()),
-                                    stats.randint.cdf,
-                                    args=(from_, to_),
-                                )
-                                self.assertTrue(res.statistic < 0.2)
-
-    @unittest.skipIf(no_cuda, no_cuda_message)
-    def test_random_from_to_cpu_vs_cuda(self):
-        for dtype in self.num_dtypes:
-            for from_ in [0, 24, 42]:
-                for to_ in [42, 99, 123]:
-                    if from_ < to_:
-                        gen = csprng.create_mt19937_generator(42)
-                        cpu_t = torch.zeros(
-                            self.size, dtype=dtype, device="cpu"
-                        ).random_(from_, to_, generator=gen)
-                        gen = csprng.create_mt19937_generator(42)
-                        cuda_t = torch.zeros(
-                            self.size, dtype=dtype, device="cuda"
-                        ).random_(from_, to_, generator=gen)
-                        self.assertTrue((cpu_t == cuda_t.cpu()).all())
-
-    def test_random_bool(self):
-        for device in self.all_devices:
-            for gen in self.all_generators:
-                t = torch.empty(self.size, dtype=torch.bool, device=device)
-
-                t.fill_(False)
-                t.random_(generator=gen)
-                self.assertEqual(t.min(), False)
-                self.assertEqual(t.max(), True)
-                self.assertTrue(
-                    0.4 < (t.eq(True)).to(torch.int).sum().item() / self.size < 0.6
-                )
-
-                t.fill_(True)
-                t.random_(generator=gen)
-                self.assertEqual(t.min(), False)
-                self.assertEqual(t.max(), True)
-                self.assertTrue(
-                    0.4 < (t.eq(True)).to(torch.int).sum().item() / self.size < 0.6
-                )
-
-    @unittest.skipIf(no_cuda, no_cuda_message)
-    def test_random_bool_cpu_vs_cuda(self):
-        gen = csprng.create_mt19937_generator(42)
-        cpu_t = torch.empty(self.size, dtype=torch.bool, device="cpu").random_(
-            generator=gen
-        )
-        gen = csprng.create_mt19937_generator(42)
-        cuda_t = torch.empty(self.size, dtype=torch.bool, device="cuda").random_(
-            generator=gen
-        )
-        self.assertTrue((cpu_t == cuda_t.cpu()).all())
-
-    def test_uniform_kstest(self):
-        for device in self.all_devices:
-            for gen in self.all_generators:
-                for dtype in self.fp_dtypes:
-                    for from_ in [-42, 0, 4.2]:
-                        for to_ in [-4.2, 0, 42]:
-                            if to_ > from_:
-                                t = torch.empty(
-                                    self.size, dtype=dtype, device=device
-                                ).uniform_(from_, to_, generator=gen)
-                                res = stats.kstest(
-                                    to_numpy(t.cpu(), torch.double),
-                                    "uniform",
-                                    args=(from_, (to_ - from_)),
-                                )
-                                self.assertTrue(res.statistic < 0.1)
-
-    @unittest.skipIf(no_cuda, no_cuda_message)
-    def test_uniform_cpu_vs_cuda(self):
-        for dtype in self.fp_dtypes:
-            for from_ in [-42, 0, 4.2]:
-                for to_ in [-4.2, 0, 42]:
-                    if to_ > from_:
-                        gen = csprng.create_mt19937_generator(42)
-                        cpu_t = torch.empty(
-                            self.size, dtype=dtype, device="cpu"
-                        ).uniform_(from_, to_, generator=gen)
-                        gen = csprng.create_mt19937_generator(42)
-                        cuda_t = torch.empty(
-                            self.size, dtype=dtype, device="cuda"
-                        ).uniform_(from_, to_, generator=gen)
-                        self.assertTrue(torch.allclose(cpu_t, cuda_t.cpu(), 1e-9))
-
-    def test_normal_kstest(self):
-        for device in self.all_devices:
-            for gen in self.all_generators:
-                for dtype in self.fp_dtypes:
-                    for mean in [-3, 0, 7]:
-                        for std in [1, 5, 7]:
-                            t = torch.empty(
-                                self.size, dtype=dtype, device=device
-                            ).normal_(mean=mean, std=std, generator=gen)
-                            res = stats.kstest(
-                                to_numpy(t.cpu(), torch.double),
-                                "norm",
-                                args=(mean, std),
-                            )
-                            self.assertTrue(res.statistic < 0.1)
-
-    @unittest.skipIf(no_cuda, no_cuda_message)
-    def test_normal_cpu_vs_cuda(self):
-        for dtype in self.fp_dtypes:
-            for mean in [-3, 0, 7]:
-                for std in [1, 5, 7]:
-                    gen = csprng.create_mt19937_generator(42)
-                    cpu_t = torch.empty(self.size, dtype=dtype, device="cpu").normal_(
-                        mean=mean, std=std, generator=gen
-                    )
-                    gen = csprng.create_mt19937_generator(42)
-                    cuda_t = torch.empty(self.size, dtype=dtype, device="cuda").normal_(
-                        mean=mean, std=std, generator=gen
-                    )
-                    self.assertTrue(torch.allclose(cpu_t, cuda_t.cpu(), 1e-9))
-
-    def test_log_normal_kstest(self):
-        for device in self.all_devices:
-            for gen in self.all_generators:
-                for dtype in self.fp_dtypes:
-                    for mean in [-3, 0, 7]:
-                        for std in [1, 5, 7]:
-                            t = torch.empty(
-                                self.size, dtype=dtype, device=device
-                            ).log_normal_(mean=mean, std=std, generator=gen)
-                            res = stats.kstest(
-                                to_numpy(t.cpu(), torch.double),
-                                "lognorm",
-                                args=(std, 0, math.exp(mean)),
-                            )
-                            if dtype in [torch.half, torch.bfloat16]:
-                                self.assertTrue(res.statistic < 0.4)
-                            else:
-                                self.assertTrue(res.statistic < 0.1)
-
-    @unittest.skipIf(no_cuda, no_cuda_message)
-    def test_log_normal_cpu_vs_cuda(self):
-        for dtype in self.fp_dtypes:
-            for mean in [-3, 0, 7]:
-                for std in [1, 5, 7]:
-                    gen = csprng.create_mt19937_generator(42)
-                    cpu_t = torch.empty(
-                        self.size, dtype=dtype, device="cpu"
-                    ).log_normal_(mean=mean, std=std, generator=gen)
-                    gen = csprng.create_mt19937_generator(42)
-                    cuda_t = torch.empty(
-                        self.size, dtype=dtype, device="cuda"
-                    ).log_normal_(mean=mean, std=std, generator=gen)
-                    self.assertTrue(
-                        torch.allclose(cpu_t, cuda_t.cpu(), 1e-4, equal_nan=True)
-                    )
-
-    def test_exponential_kstest(self):
-        for device in self.all_devices:
-            for gen in self.all_generators:
-                for dtype in self.fp_dtypes:
-                    for lambd in [0.5, 1.0, 5.0]:
-                        t = torch.empty(
-                            self.size, dtype=dtype, device=device
-                        ).exponential_(lambd=lambd, generator=gen)
-                        res = stats.kstest(
-                            to_numpy(t.cpu(), torch.double),
-                            "expon",
-                            args=(
-                                0,
-                                1 / lambd,
-                            ),
-                        )
-                        self.assertTrue(res.statistic < 0.1)
-
-    @unittest.skipIf(no_cuda, no_cuda_message)
-    @unittest.skip("https://github.com/pytorch/pytorch/issues/38662")
-    def test_exponential_cpu_vs_cuda(self):
-        for dtype in self.fp_dtypes:
-            for lambd in [0.5, 1.0, 5.0]:
-                gen = csprng.create_mt19937_generator(42)
-                cpu_t = torch.empty(self.size, dtype=dtype, device="cpu").exponential_(
-                    lambd=lambd, generator=gen
-                )
-                gen = csprng.create_mt19937_generator(42)
-                cuda_t = torch.empty(
-                    self.size, dtype=dtype, device="cuda"
-                ).exponential_(lambd=lambd, generator=gen)
-                self.assertTrue(torch.allclose(cpu_t, cuda_t.cpu(), 1e-9))
-
-    def test_cauchy_kstest(self):
-        for device in self.all_devices:
-            for gen in self.all_generators:
-                for dtype in self.fp_dtypes:
-                    for median in [-10, 0, 50]:
-                        for sigma in [0.5, 1.0, 10.0]:
-                            t = torch.empty(
-                                self.size, dtype=dtype, device=device
-                            ).cauchy_(median=median, sigma=sigma, generator=gen)
-                            res = stats.kstest(
-                                to_numpy(t.cpu(), torch.double),
-                                "cauchy",
-                                args=(median, sigma),
-                            )
-                            if dtype in [torch.half, torch.bfloat16]:
-                                self.assertTrue(res.statistic < 0.4)
-                            else:
-                                self.assertTrue(res.statistic < 0.1)
-
-    @unittest.skipIf(no_cuda, no_cuda_message)
-    def test_cauchy_cpu_vs_cuda(self):
-        for dtype in self.fp_dtypes:
-            for median in [-10, 0, 50]:
-                for sigma in [0.5, 1.0, 10.0]:
-                    gen = csprng.create_mt19937_generator(42)
-                    cpu_t = torch.empty(self.size, dtype=dtype, device="cpu").cauchy_(
-                        median=median, sigma=sigma, generator=gen
-                    )
-                    gen = csprng.create_mt19937_generator(42)
-                    cuda_t = torch.empty(self.size, dtype=dtype, device="cuda").cauchy_(
-                        median=median, sigma=sigma, generator=gen
-                    )
-                    self.assertTrue(torch.allclose(cpu_t, cuda_t.cpu(), 1e-9))
-
-    def test_geometric(self):
-        for device in self.all_devices:
-            for gen in self.all_generators:
-                for dtype in self.fp_dtypes:
-                    for p in [0.2, 0.5, 0.8]:
-                        t = torch.empty(
-                            self.size, dtype=dtype, device=device
-                        ).geometric_(p=p, generator=gen)
-                        # actual = np.histogram(t.cpu().to(torch.double), np.arange(1, 100))[0]
-                        # expected = stats.geom(p).pmf(np.arange(1, 99)) * self.size
-                        # res = stats.chisquare(actual, expected)
-                        # self.assertAlmostEqual(res.pvalue, 1.0, delta=0.5) TODO https://github.com/pytorch/csprng/issues/7
-
-    @unittest.skipIf(no_cuda, no_cuda_message)
-    def test_geometric_cpu_vs_cuda(self):
-        for dtype in self.fp_dtypes:
-            for p in [0.2, 0.5, 0.8]:
-                gen = csprng.create_mt19937_generator(42)
-                cpu_t = torch.empty(self.size, dtype=dtype, device="cpu").geometric_(
-                    p=p, generator=gen
-                )
-                gen = csprng.create_mt19937_generator(42)
-                cuda_t = torch.empty(self.size, dtype=dtype, device="cuda").geometric_(
-                    p=p, generator=gen
-                )
-                self.assertTrue(
-                    torch.allclose(cpu_t, cuda_t.cpu(), 1e-9, equal_nan=True)
-                )
-
-    def test_non_contiguous_vs_contiguous(self):
-        size = 10
-        for device in self.all_devices:
-            for dtype in self.all_dtypes:
-                for i in range(10):
-                    t = torch.zeros([size, size, size], dtype=dtype, device=device)
-                    x1 = random.randrange(0, size)
-                    y1 = random.randrange(0, size)
-                    z1 = random.randrange(0, size)
-                    x2 = random.randrange(x1 + 1, max(x1 + 2, size))
-                    y2 = random.randrange(y1 + 1, max(y1 + 2, size))
-                    z2 = random.randrange(z1 + 1, max(z1 + 2, size))
-                    maybe_non_contiguous = t[x1:x2, y1:y2, z1:z2]
-                    assert maybe_non_contiguous.numel() > 0
-
-                    if not maybe_non_contiguous.is_contiguous():
-                        seed = random.randrange(1000)
-
-                        non_contiguous = maybe_non_contiguous
-                        gen = csprng.create_mt19937_generator(seed)
-                        non_contiguous.random_(generator=gen)
-
-                        contiguous = torch.zeros_like(non_contiguous)
-                        gen = csprng.create_mt19937_generator(seed)
-                        contiguous.random_(generator=gen)
-
-                        assert contiguous.is_contiguous()
-                        self.assertTrue((non_contiguous == contiguous).all())
-
-                        for x in range(0, size):
-                            for y in range(0, size):
-                                for z in range(0, size):
-                                    if (
-                                        not x1 <= x < x2
-                                        and not y1 <= y < y2
-                                        and not z1 <= z < z2
-                                    ):
-                                        self.assertTrue(t[x, y, z] == 0)
-
-    @unittest.skipIf(IS_SANDCASTLE or IS_FBCODE, "Does not work on Sandcastle")
-    @unittest.skipIf(torch.get_num_threads() < 2, "requires multithreading CPU")
-    def test_cpu_parallel(self):
-        urandom_gen = csprng.create_random_device_generator("/dev/urandom")
-
-        def measure(size):
-            t = torch.empty(size, dtype=torch.float32, device="cpu")
-            start = time.time()
-            for i in range(20):
-                t.normal_(generator=urandom_gen)
-            finish = time.time()
-            return finish - start
-
-        time_for_1K = measure(1000)
-        time_for_1M = measure(1000000)
-        # Pessimistic check that parallel execution gives >= 1.5 performance boost
-        self.assertTrue(time_for_1M / time_for_1K < 1000 / 1.5)
-
-    @unittest.skipIf(IS_SANDCASTLE or IS_FBCODE, "Does not work on Sandcastle")
-    def test_version(self):
-        self.assertTrue(csprng.__version__)
-        self.assertTrue(csprng.git_version)
-
-    def test_randperm(self):
-        for device in self.all_devices:
-            for gen in self.all_generators:
-                for dtype in self.int_dtypes:
-                    for size in range(0, 20):
-                        expected = torch.arange(size, dtype=dtype, device=device)
-
-                        actual = torch.randperm(
-                            size, dtype=dtype, device=device, generator=gen
-                        )
-
-                        actual_out = torch.empty(1, dtype=dtype, device=device)
-                        torch.randperm(size, out=actual_out, generator=gen)
-
-                        if size >= 10:
-                            self.assertTrue(not torch.allclose(expected, actual))
-                            self.assertTrue(not torch.allclose(expected, actual_out))
-
-                        actual = actual.sort()[0]
-                        actual_out = actual.sort()[0]
-
-                        self.assertTrue(torch.allclose(expected, actual))
-                        self.assertTrue(torch.allclose(expected, actual_out))
-
-    def test_encrypt_decrypt(self):
-        key_size_bytes = 16
-        block_size_bytes = 16
-
-        def sizeof(dtype):
-            if dtype == torch.bool:
-                return 1
-            elif dtype.is_floating_point:
-                return torch.finfo(dtype).bits // 8
-            else:
-                return torch.iinfo(dtype).bits // 8
-
-        def pad(data, pad_size):
-            if len(data) % pad_size == 0:
-                return data
-            length = pad_size - (len(data) % pad_size)
-            return data + bytes([0]) * length
-
-        def create_aes(m, k):
-            if m == "ecb":
-                return AES.new(k.tobytes(), AES.MODE_ECB)
-            elif m == "ctr":
-                ctr = Counter.new(
-                    AES.block_size * 8, initial_value=0, little_endian=True
-                )
-                return AES.new(k.tobytes(), AES.MODE_CTR, counter=ctr)
-            else:
-                return None
-
-        for key_dtype in self.all_dtypes:
-            key_size = key_size_bytes // sizeof(key_dtype)
-            key = torch.empty(key_size, dtype=key_dtype).random_()
-            key_np = to_bytes(key)
-            for initial_dtype in self.all_dtypes:
-                for initial_size in [0, 4, 8, 15, 16, 23, 42]:
-                    initial = torch.empty(initial_size, dtype=initial_dtype).random_()
-                    initial_np = to_bytes(initial)
-                    initial_size_bytes = initial_size * sizeof(initial_dtype)
-                    for encrypted_dtype in self.all_dtypes:
-                        encrypted_size = (
-                            (initial_size_bytes + block_size_bytes - 1)
-                            // block_size_bytes
-                            * block_size_bytes
-                            // sizeof(encrypted_dtype)
-                        )
-                        encrypted = torch.zeros(encrypted_size, dtype=encrypted_dtype)
-                        for decrypted_dtype in self.all_dtypes:
-                            decrypted_size = (
-                                initial_size_bytes + sizeof(decrypted_dtype) - 1
-                            ) // sizeof(decrypted_dtype)
-                            decrypted = torch.zeros(
-                                decrypted_size, dtype=decrypted_dtype
-                            )
-                            for mode in ["ecb", "ctr"]:
-                                for device in self.all_devices:
-                                    key = key.to(device)
-                                    initial = initial.to(device)
-                                    encrypted = encrypted.to(device)
-                                    decrypted = decrypted.to(device)
-
-                                    csprng.encrypt(
-                                        initial, encrypted, key, "aes128", mode
-                                    )
-                                    encrypted_np = to_bytes(encrypted)
-
-                                    aes = create_aes(mode, key_np)
-
-                                    encrypted_expected = np.frombuffer(
-                                        aes.encrypt(
-                                            pad(initial_np.tobytes(), block_size_bytes)
-                                        ),
-                                        dtype=np.int8,
-                                    )
-                                    self.assertTrue(
-                                        np.array_equal(encrypted_np, encrypted_expected)
-                                    )
-
-                                    csprng.decrypt(
-                                        encrypted, decrypted, key, "aes128", mode
-                                    )
-                                    decrypted_np = to_bytes(decrypted)[
-                                        :initial_size_bytes
-                                    ]
-
-                                    aes = create_aes(mode, key_np)
-
-                                    decrypted_expected = np.frombuffer(
-                                        aes.decrypt(
-                                            pad(
-                                                encrypted_np.tobytes(), block_size_bytes
-                                            )
-                                        ),
-                                        dtype=np.int8,
-                                    )[:initial_size_bytes]
-                                    self.assertTrue(
-                                        np.array_equal(decrypted_np, decrypted_expected)
-                                    )
-
-                                    self.assertTrue(
-                                        np.array_equal(initial_np, decrypted_np)
-                                    )
-
-    def test_encrypt_decrypt_inplace(self):
-        key_size_bytes = 16
-
-        def sizeof(dtype):
-            if dtype == torch.bool:
-                return 1
-            elif dtype.is_floating_point:
-                return torch.finfo(dtype).bits // 8
-            else:
-                return torch.iinfo(dtype).bits // 8
-
-        def create_aes(m, k):
-            if m == "ecb":
-                return AES.new(k.tobytes(), AES.MODE_ECB)
-            elif m == "ctr":
-                ctr = Counter.new(
-                    AES.block_size * 8, initial_value=0, little_endian=True
-                )
-                return AES.new(k.tobytes(), AES.MODE_CTR, counter=ctr)
-            else:
-                return None
-
-        for key_dtype in self.all_dtypes:
-            key_size = key_size_bytes // sizeof(key_dtype)
-            key = torch.empty(key_size, dtype=key_dtype).random_()
-            key_np = to_bytes(key)
-            for initial_dtype in self.all_dtypes:
-                for initial_size_bytes in [0, 16, 256]:
-                    initial_size = initial_size_bytes // sizeof(initial_dtype)
-                    initial = torch.empty(initial_size, dtype=initial_dtype).random_()
-                    initial_np = to_bytes(initial)
-                    initial_np_copy = np.copy(initial_np)
-                    for mode in ["ecb", "ctr"]:
-                        for device in self.all_devices:
-                            key = key.to(device)
-                            initial = initial.to(device)
-
-                            csprng.encrypt(initial, initial, key, "aes128", mode)
-                            encrypted_np = to_bytes(initial)
-                            aes = create_aes(mode, key_np)
-                            encrypted_expected = np.frombuffer(
-                                aes.encrypt(initial_np_copy.tobytes()), dtype=np.int8
-                            )
-                            self.assertTrue(
-                                np.array_equal(encrypted_np, encrypted_expected)
-                            )
-
-                            encrypted_np_copy = np.copy(encrypted_np)
-
-                            csprng.decrypt(initial, initial, key, "aes128", mode)
-                            decrypted_np = to_bytes(initial)
-                            aes = create_aes(mode, key_np)
-                            decrypted_expected = np.frombuffer(
-                                aes.decrypt(encrypted_np_copy.tobytes()), dtype=np.int8
-                            )
-                            self.assertTrue(
-                                np.array_equal(decrypted_np, decrypted_expected)
-                            )
-
-                            self.assertTrue(
-                                np.array_equal(initial_np_copy, decrypted_np)
-                            )
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/torchcsprng/__init__.py b/torchcsprng/__init__.py
deleted file mode 100644
index a05c967..0000000
--- a/torchcsprng/__init__.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-import torch
-
-from torchcsprng._C import *
-
-
-try:
-    from .version import __version__, git_version  # noqa: F401
-except ImportError:
-    pass
diff --git a/torchcsprng/__init__.pyi b/torchcsprng/__init__.pyi
deleted file mode 100644
index 236dbfe..0000000
--- a/torchcsprng/__init__.pyi
+++ /dev/null
@@ -1,14 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-from torch import Generator, Tensor
-
-def supports_cuda() -> bool: ...
-def create_random_device_generator(token: str = "") -> Generator: ...
-def create_mt19937_generator(seed: int = 0): ...
-def encrypt(input: Tensor, output: Tensor, key: Tensor, cipher, mode): ...
-def decrypt(input: Tensor, output: Tensor, key: Tensor, cipher, mode): ...
-def __version__() -> str: ...
-def git_version() -> str: ...

From 5bf7869f45acecf1c79442168bd9c260a43a8d08 Mon Sep 17 00:00:00 2001
From: myl7 <myl@myl.moe>
Date: Mon, 11 Dec 2023 14:51:35 +0800
Subject: [PATCH 02/10] Clean torchcsprng c src

Remove unused CPU impl.
Format code.
---
 .clang-format                                |   9 +
 torchcsprng/{csrc => }/OffsetCalculator.cuh  |  13 +-
 torchcsprng/{csrc => }/THCIntegerDivider.cuh |  27 +-
 torchcsprng/{csrc/aes.inc => aes.h}          | 255 +++++-----
 torchcsprng/block_cipher.h                   | 178 +++++++
 torchcsprng/csprng.cpp                       | 292 +++++++++++
 torchcsprng/csrc/block_cipher.h              | 201 --------
 torchcsprng/csrc/cpu/kernels.cpp             |  16 -
 torchcsprng/csrc/cpu/kernels.h               |  19 -
 torchcsprng/csrc/csprng.cpp                  | 377 ---------------
 torchcsprng/csrc/cuda/kernels.cu             |  16 -
 torchcsprng/csrc/cuda/kernels.cuh            |  19 -
 torchcsprng/csrc/kernels_body.inc            | 437 -----------------
 torchcsprng/csrc/kernels_commons.h           |  47 --
 torchcsprng/csrc/kernels_decls.inc           |  56 ---
 torchcsprng/kernels.cu                       | 484 +++++++++++++++++++
 torchcsprng/kernels.cuh                      |  84 ++++
 torchcsprng/kernels_commons.h                |  78 +++
 torchcsprng/{csrc => }/macros.h              |   0
 19 files changed, 1264 insertions(+), 1344 deletions(-)
 create mode 100644 .clang-format
 rename torchcsprng/{csrc => }/OffsetCalculator.cuh (91%)
 rename torchcsprng/{csrc => }/THCIntegerDivider.cuh (86%)
 rename torchcsprng/{csrc/aes.inc => aes.h} (54%)
 create mode 100644 torchcsprng/block_cipher.h
 create mode 100644 torchcsprng/csprng.cpp
 delete mode 100644 torchcsprng/csrc/block_cipher.h
 delete mode 100644 torchcsprng/csrc/cpu/kernels.cpp
 delete mode 100644 torchcsprng/csrc/cpu/kernels.h
 delete mode 100644 torchcsprng/csrc/csprng.cpp
 delete mode 100644 torchcsprng/csrc/cuda/kernels.cu
 delete mode 100644 torchcsprng/csrc/cuda/kernels.cuh
 delete mode 100644 torchcsprng/csrc/kernels_body.inc
 delete mode 100644 torchcsprng/csrc/kernels_commons.h
 delete mode 100644 torchcsprng/csrc/kernels_decls.inc
 create mode 100644 torchcsprng/kernels.cu
 create mode 100644 torchcsprng/kernels.cuh
 create mode 100644 torchcsprng/kernels_commons.h
 rename torchcsprng/{csrc => }/macros.h (100%)

diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..0b97c10
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,9 @@
+BasedOnStyle: Google
+DerivePointerAlignment: false
+PointerAlignment: Right
+ColumnLimit: 120
+AllowShortFunctionsOnASingleLine: Empty
+AlignAfterOpenBracket: DontAlign
+ContinuationIndentWidth: 2
+SortIncludes: Never
+AlignEscapedNewlines: DontAlign
diff --git a/torchcsprng/csrc/OffsetCalculator.cuh b/torchcsprng/OffsetCalculator.cuh
similarity index 91%
rename from torchcsprng/csrc/OffsetCalculator.cuh
rename to torchcsprng/OffsetCalculator.cuh
index c38e54a..15fcd0e 100644
--- a/torchcsprng/csrc/OffsetCalculator.cuh
+++ b/torchcsprng/OffsetCalculator.cuh
@@ -33,7 +33,9 @@ struct OffsetCalculator {
 
   // if element_sizes is nullptr, then the strides will be in bytes, otherwise
   // the strides will be in # of elements.
-  OffsetCalculator(int dims, const int64_t* sizes, const int64_t* const* strides, const int64_t* element_sizes=nullptr) : dims(dims) {
+  OffsetCalculator(
+    int dims, const int64_t *sizes, const int64_t *const *strides, const int64_t *element_sizes = nullptr)
+      : dims(dims) {
     TORCH_CHECK(dims <= MAX_DIMS, "tensor has too many (>", MAX_DIMS, ") dims");
     for (int i = 0; i < MAX_DIMS; ++i) {
       if (i < dims) {
@@ -43,7 +45,7 @@ struct OffsetCalculator {
       }
       for (int arg = 0; arg < NARGS; arg++) {
         int64_t element_size = (element_sizes == nullptr ? 1LL : element_sizes[arg]);
-        strides_[i][arg] =  i < dims ? strides[arg][i] / element_size : 0;
+        strides_[i][arg] = i < dims ? strides[arg][i] / element_size : 0;
       }
     }
   }
@@ -67,7 +69,6 @@ struct OffsetCalculator {
       for (int arg = 0; arg < NARGS; arg++) {
         offsets[arg] += divmod.mod * strides_[dim][arg];
       }
-
     }
     return offsets;
   }
@@ -96,10 +97,10 @@ struct TrivialOffsetCalculator {
   }
 };
 
-template<int N>
-static OffsetCalculator<N> make_offset_calculator(const at::TensorIterator& iter) {
+template <int N>
+static OffsetCalculator<N> make_offset_calculator(const at::TensorIterator &iter) {
   AT_ASSERT(N <= iter.ntensors());
-  std::array<const int64_t*, N> strides;
+  std::array<const int64_t *, N> strides;
   for (int i = 0; i < N; i++) {
     strides[i] = iter.strides(i).data();
   }
diff --git a/torchcsprng/csrc/THCIntegerDivider.cuh b/torchcsprng/THCIntegerDivider.cuh
similarity index 86%
rename from torchcsprng/csrc/THCIntegerDivider.cuh
rename to torchcsprng/THCIntegerDivider.cuh
index cdf01af..b7dfb6a 100644
--- a/torchcsprng/csrc/THCIntegerDivider.cuh
+++ b/torchcsprng/THCIntegerDivider.cuh
@@ -62,18 +62,22 @@ template <typename Value>
 struct DivMod {
   Value div, mod;
 
-  C10_HOST_DEVICE DivMod(Value div, Value mod) : div(div), mod(mod) { }
+  C10_HOST_DEVICE DivMod(Value div, Value mod) : div(div), mod(mod) {}
 };
 
 // Base case: we only have an implementation for uint32_t for now.  For
 // everything else, we use plain division.
 template <typename Value>
 struct IntDivider {
-  IntDivider() { }  // Dummy constructor for arrays.
-  IntDivider(Value d) : divisor(d) { }
+  IntDivider() {}  // Dummy constructor for arrays.
+  IntDivider(Value d) : divisor(d) {}
 
-  C10_HOST_DEVICE inline Value div(Value n) const { return n / divisor; }
-  C10_HOST_DEVICE inline Value mod(Value n) const { return n % divisor; }
+  C10_HOST_DEVICE inline Value div(Value n) const {
+    return n / divisor;
+  }
+  C10_HOST_DEVICE inline Value mod(Value n) const {
+    return n % divisor;
+  }
   C10_HOST_DEVICE inline DivMod<Value> divmod(Value n) const {
     return DivMod<Value>(n / divisor, n % divisor);
   }
@@ -86,13 +90,14 @@ template <>
 struct IntDivider<unsigned int> {
   static_assert(sizeof(unsigned int) == 4, "Assumes 32-bit unsigned int.");
 
-  IntDivider() { }  // Dummy constructor for arrays.
+  IntDivider() {}  // Dummy constructor for arrays.
 
   IntDivider(unsigned int d) : divisor(d) {
     assert(divisor >= 1 && divisor <= INT32_MAX);
 
     // TODO: gcc/clang has __builtin_clz() but it's not portable.
-    for (shift = 0; shift < 32; shift++) if ((1U << shift) >= divisor) break;
+    for (shift = 0; shift < 32; shift++)
+      if ((1U << shift) >= divisor) break;
 
     uint64_t one = 1;
     uint64_t magic = ((one << 32) * ((one << shift) - divisor)) / divisor + 1;
@@ -108,7 +113,7 @@ struct IntDivider<unsigned int> {
     return (t + n) >> shift;
 #else
     // Using uint64_t so that the addition does not overflow.
-    uint64_t t = ((uint64_t) n * m1) >> 32;
+    uint64_t t = ((uint64_t)n * m1) >> 32;
     return (t + n) >> shift;
 #endif
   }
@@ -123,8 +128,8 @@ struct IntDivider<unsigned int> {
   }
 
   unsigned int divisor;  // d above.
-  unsigned int m1;  // Magic number: m' above.
-  unsigned int shift;  // Shift amounts.
+  unsigned int m1;       // Magic number: m' above.
+  unsigned int shift;    // Shift amounts.
 };
 
-#endif // THC_INTEGER_DIVIDER_INC
+#endif  // THC_INTEGER_DIVIDER_INC
diff --git a/torchcsprng/csrc/aes.inc b/torchcsprng/aes.h
similarity index 54%
rename from torchcsprng/csrc/aes.inc
rename to torchcsprng/aes.h
index db29996..32041f3 100644
--- a/torchcsprng/csrc/aes.inc
+++ b/torchcsprng/aes.h
@@ -10,7 +10,7 @@ namespace aes {
 // This AES implementation is based on
 // https://github.com/kokke/tiny-AES-c/blob/master/aes.c
 // authored by kokke and et al. and distributed under public domain license.
-// 
+//
 // This is free and unencumbered software released into the public domain.
 //
 // Anyone is free to copy, modify, publish, use, compile, sell, or
@@ -45,14 +45,14 @@ namespace aes {
 #define Nb 4
 
 #if defined(AES256) && (AES256 == 1)
-    #define Nk 8
-    #define Nr 14
+#define Nk 8
+#define Nr 14
 #elif defined(AES192) && (AES192 == 1)
-    #define Nk 6
-    #define Nr 12
+#define Nk 6
+#define Nr 12
 #else
-    #define Nk 4        // The number of 32 bit words in a key.
-    #define Nr 10       // The number of rounds in AES Cipher.
+#define Nk 4   // The number of 32 bit words in a key.
+#define Nr 10  // The number of rounds in AES Cipher.
 #endif
 
 constexpr size_t block_t_size = 16;
@@ -60,62 +60,55 @@ constexpr size_t block_t_size = 16;
 typedef uint8_t state_t[4][4];
 
 // The lookup-tables are marked const so they can be placed in read-only storage instead of RAM
-// The numbers below can be computed dynamically trading ROM for RAM - 
+// The numbers below can be computed dynamically trading ROM for RAM -
 // This can be useful in (embedded) bootloader applications, where ROM is often limited.
 TORCH_CSPRNG_CONSTANT const uint8_t sbox[256] = {
-  //0     1    2      3     4    5     6     7      8    9     A      B    C     D     E     F
-  0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
-  0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
-  0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
-  0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
-  0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
-  0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
-  0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
-  0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
-  0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
-  0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
-  0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
-  0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
-  0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
-  0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
-  0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
-  0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 };
-
-TORCH_CSPRNG_CONSTANT const uint8_t rsbox[256] = {
-    0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
-    0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
-    0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
-    0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
-    0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
-    0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
-    0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
-    0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
-    0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
-    0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
-    0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
-    0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
-    0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
-    0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
-    0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
-    0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d };
-
-// The round constant word array, Rcon[i], contains the values given by 
+  // 0     1    2      3     4    5     6     7      8    9     A      B    C     D     E     F
+  0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, 0xca, 0x82, 0xc9,
+  0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f,
+  0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07,
+  0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3,
+  0x29, 0xe3, 0x2f, 0x84, 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58,
+  0xcf, 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, 0x51, 0xa3,
+  0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, 0xcd, 0x0c, 0x13, 0xec, 0x5f,
+  0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
+  0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac,
+  0x62, 0x91, 0x95, 0xe4, 0x79, 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a,
+  0xae, 0x08, 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, 0x70,
+  0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, 0xe1, 0xf8, 0x98, 0x11,
+  0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42,
+  0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16};
+
+TORCH_CSPRNG_CONSTANT const uint8_t rsbox[256] = {0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3,
+  0x9e, 0x81, 0xf3, 0xd7, 0xfb, 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde,
+  0xe9, 0xcb, 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, 0x08,
+  0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, 0x72, 0xf8, 0xf6, 0x64,
+  0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9,
+  0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4,
+  0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01,
+  0x13, 0x8a, 0x6b, 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+  0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, 0x47, 0xf1, 0x1a,
+  0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2,
+  0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1,
+  0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f,
+  0x93, 0xc9, 0x9c, 0xef, 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99,
+  0x61, 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d};
+
+// The round constant word array, Rcon[i], contains the values given by
 // x to the power (i-1) being powers of x (x is denoted as {02}) in the field GF(2^8)
-TORCH_CSPRNG_CONSTANT const uint8_t Rcon[11] = {
-  0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 };
+TORCH_CSPRNG_CONSTANT const uint8_t Rcon[11] = {0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36};
 
 #define getSBoxValue(num) (sbox[(num)])
 
 #define getSBoxInvert(num) (rsbox[(num)])
 
-// This function produces Nb(Nr+1) round keys. The round keys are used in each round to decrypt the states. 
-TORCH_CSPRNG_HOST_DEVICE void KeyExpansion(uint8_t* RoundKey, const uint8_t* Key){
+// This function produces Nb(Nr+1) round keys. The round keys are used in each round to decrypt the states.
+TORCH_CSPRNG_HOST_DEVICE void KeyExpansion(uint8_t *RoundKey, const uint8_t *Key) {
   unsigned int i, j, k;
-  uint8_t tempa[4]; // Used for the column/row operations
-  
+  uint8_t tempa[4];  // Used for the column/row operations
+
   // The first round key is the key itself.
-  for (i = 0; i < Nk; ++i)
-  {
+  for (i = 0; i < Nk; ++i) {
     RoundKey[(i * 4) + 0] = Key[(i * 4) + 0];
     RoundKey[(i * 4) + 1] = Key[(i * 4) + 1];
     RoundKey[(i * 4) + 2] = Key[(i * 4) + 2];
@@ -123,19 +116,16 @@ TORCH_CSPRNG_HOST_DEVICE void KeyExpansion(uint8_t* RoundKey, const uint8_t* Key
   }
 
   // All other round keys are found from the previous round keys.
-  for (i = Nk; i < Nb * (Nr + 1); ++i)
-  {
+  for (i = Nk; i < Nb * (Nr + 1); ++i) {
     {
       k = (i - 1) * 4;
-      tempa[0]=RoundKey[k + 0];
-      tempa[1]=RoundKey[k + 1];
-      tempa[2]=RoundKey[k + 2];
-      tempa[3]=RoundKey[k + 3];
-
+      tempa[0] = RoundKey[k + 0];
+      tempa[1] = RoundKey[k + 1];
+      tempa[2] = RoundKey[k + 2];
+      tempa[3] = RoundKey[k + 3];
     }
 
-    if (i % Nk == 0)
-    {
+    if (i % Nk == 0) {
       // This function shifts the 4 bytes in a word to the left once.
       // [a0,a1,a2,a3] becomes [a1,a2,a3,a0]
 
@@ -148,7 +138,7 @@ TORCH_CSPRNG_HOST_DEVICE void KeyExpansion(uint8_t* RoundKey, const uint8_t* Key
         tempa[3] = u8tmp;
       }
 
-      // SubWord() is a function that takes a four-byte input word and 
+      // SubWord() is a function that takes a four-byte input word and
       // applies the S-box to each of the four bytes to produce an output word.
 
       // Function Subword()
@@ -159,11 +149,10 @@ TORCH_CSPRNG_HOST_DEVICE void KeyExpansion(uint8_t* RoundKey, const uint8_t* Key
         tempa[3] = getSBoxValue(tempa[3]);
       }
 
-      tempa[0] = tempa[0] ^ Rcon[i/Nk];
+      tempa[0] = tempa[0] ^ Rcon[i / Nk];
     }
 #if defined(AES256) && (AES256 == 1)
-    if (i % Nk == 4)
-    {
+    if (i % Nk == 4) {
       // Function Subword()
       {
         tempa[0] = getSBoxValue(tempa[0]);
@@ -173,7 +162,8 @@ TORCH_CSPRNG_HOST_DEVICE void KeyExpansion(uint8_t* RoundKey, const uint8_t* Key
       }
     }
 #endif
-    j = i * 4; k=(i - Nk) * 4;
+    j = i * 4;
+    k = (i - Nk) * 4;
     RoundKey[j + 0] = RoundKey[k + 0] ^ tempa[0];
     RoundKey[j + 1] = RoundKey[k + 1] ^ tempa[1];
     RoundKey[j + 2] = RoundKey[k + 2] ^ tempa[2];
@@ -183,13 +173,10 @@ TORCH_CSPRNG_HOST_DEVICE void KeyExpansion(uint8_t* RoundKey, const uint8_t* Key
 
 // This function adds the round key to state.
 // The round key is added to the state by an XOR function.
-TORCH_CSPRNG_HOST_DEVICE void AddRoundKey(uint8_t round, state_t* state, const uint8_t* RoundKey)
-{
-  uint8_t i,j;
-  for (i = 0; i < 4; ++i)
-  {
-    for (j = 0; j < 4; ++j)
-    {
+TORCH_CSPRNG_HOST_DEVICE void AddRoundKey(uint8_t round, state_t *state, const uint8_t *RoundKey) {
+  uint8_t i, j;
+  for (i = 0; i < 4; ++i) {
+    for (j = 0; j < 4; ++j) {
       (*state)[i][j] ^= RoundKey[(round * Nb * 4) + (i * Nb) + j];
     }
   }
@@ -197,13 +184,10 @@ TORCH_CSPRNG_HOST_DEVICE void AddRoundKey(uint8_t round, state_t* state, const u
 
 // The SubBytes Function Substitutes the values in the
 // state matrix with values in an S-box.
-TORCH_CSPRNG_HOST_DEVICE void SubBytes(state_t* state)
-{
+TORCH_CSPRNG_HOST_DEVICE void SubBytes(state_t *state) {
   uint8_t i, j;
-  for (i = 0; i < 4; ++i)
-  {
-    for (j = 0; j < 4; ++j)
-    {
+  for (i = 0; i < 4; ++i) {
+    for (j = 0; j < 4; ++j) {
       (*state)[j][i] = getSBoxValue((*state)[j][i]);
     }
   }
@@ -212,73 +196,72 @@ TORCH_CSPRNG_HOST_DEVICE void SubBytes(state_t* state)
 // The ShiftRows() function shifts the rows in the state to the left.
 // Each row is shifted with different offset.
 // Offset = Row number. So the first row is not shifted.
-TORCH_CSPRNG_HOST_DEVICE void ShiftRows(state_t* state)
-{
+TORCH_CSPRNG_HOST_DEVICE void ShiftRows(state_t *state) {
   uint8_t temp;
 
-  // Rotate first row 1 columns to left  
-  temp           = (*state)[0][1];
+  // Rotate first row 1 columns to left
+  temp = (*state)[0][1];
   (*state)[0][1] = (*state)[1][1];
   (*state)[1][1] = (*state)[2][1];
   (*state)[2][1] = (*state)[3][1];
   (*state)[3][1] = temp;
 
-  // Rotate second row 2 columns to left  
-  temp           = (*state)[0][2];
+  // Rotate second row 2 columns to left
+  temp = (*state)[0][2];
   (*state)[0][2] = (*state)[2][2];
   (*state)[2][2] = temp;
 
-  temp           = (*state)[1][2];
+  temp = (*state)[1][2];
   (*state)[1][2] = (*state)[3][2];
   (*state)[3][2] = temp;
 
   // Rotate third row 3 columns to left
-  temp           = (*state)[0][3];
+  temp = (*state)[0][3];
   (*state)[0][3] = (*state)[3][3];
   (*state)[3][3] = (*state)[2][3];
   (*state)[2][3] = (*state)[1][3];
   (*state)[1][3] = temp;
 }
 
-TORCH_CSPRNG_HOST_DEVICE uint8_t xtime(uint8_t x)
-{
-  return ((x<<1) ^ (((x>>7) & 1) * 0x1b));
+TORCH_CSPRNG_HOST_DEVICE uint8_t xtime(uint8_t x) {
+  return ((x << 1) ^ (((x >> 7) & 1) * 0x1b));
 }
 
 // MixColumns function mixes the columns of the state matrix
-TORCH_CSPRNG_HOST_DEVICE void MixColumns(state_t* state)
-{
+TORCH_CSPRNG_HOST_DEVICE void MixColumns(state_t *state) {
   uint8_t i;
   uint8_t Tmp, Tm, t;
-  for (i = 0; i < 4; ++i)
-  {  
-    t   = (*state)[i][0];
-    Tmp = (*state)[i][0] ^ (*state)[i][1] ^ (*state)[i][2] ^ (*state)[i][3] ;
-    Tm  = (*state)[i][0] ^ (*state)[i][1] ; Tm = xtime(Tm);  (*state)[i][0] ^= Tm ^ Tmp ;
-    Tm  = (*state)[i][1] ^ (*state)[i][2] ; Tm = xtime(Tm);  (*state)[i][1] ^= Tm ^ Tmp ;
-    Tm  = (*state)[i][2] ^ (*state)[i][3] ; Tm = xtime(Tm);  (*state)[i][2] ^= Tm ^ Tmp ;
-    Tm  = (*state)[i][3] ^ t ;              Tm = xtime(Tm);  (*state)[i][3] ^= Tm ^ Tmp ;
+  for (i = 0; i < 4; ++i) {
+    t = (*state)[i][0];
+    Tmp = (*state)[i][0] ^ (*state)[i][1] ^ (*state)[i][2] ^ (*state)[i][3];
+    Tm = (*state)[i][0] ^ (*state)[i][1];
+    Tm = xtime(Tm);
+    (*state)[i][0] ^= Tm ^ Tmp;
+    Tm = (*state)[i][1] ^ (*state)[i][2];
+    Tm = xtime(Tm);
+    (*state)[i][1] ^= Tm ^ Tmp;
+    Tm = (*state)[i][2] ^ (*state)[i][3];
+    Tm = xtime(Tm);
+    (*state)[i][2] ^= Tm ^ Tmp;
+    Tm = (*state)[i][3] ^ t;
+    Tm = xtime(Tm);
+    (*state)[i][3] ^= Tm ^ Tmp;
   }
 }
 
-TORCH_CSPRNG_HOST_DEVICE uint8_t Multiply(uint8_t x, uint8_t y)
-{
-  return (((y & 1) * x) ^
-          ((y>>1 & 1) * xtime(x)) ^
-          ((y>>2 & 1) * xtime(xtime(x))) ^
-          ((y>>3 & 1) * xtime(xtime(xtime(x)))) ^
-          ((y>>4 & 1) * xtime(xtime(xtime(xtime(x)))))); /* this last call to xtime() can be omitted */
+TORCH_CSPRNG_HOST_DEVICE uint8_t Multiply(uint8_t x, uint8_t y) {
+  return (((y & 1) * x) ^ ((y >> 1 & 1) * xtime(x)) ^ ((y >> 2 & 1) * xtime(xtime(x))) ^
+          ((y >> 3 & 1) * xtime(xtime(xtime(x)))) ^
+          ((y >> 4 & 1) * xtime(xtime(xtime(xtime(x)))))); /* this last call to xtime() can be omitted */
 }
 
 // MixColumns function mixes the columns of the state matrix.
 // The method used to multiply may be difficult to understand for the inexperienced.
 // Please use the references to gain more information.
-TORCH_CSPRNG_HOST_DEVICE void InvMixColumns(state_t* state)
-{
+TORCH_CSPRNG_HOST_DEVICE void InvMixColumns(state_t *state) {
   int i;
   uint8_t a, b, c, d;
-  for (i = 0; i < 4; ++i)
-  {
+  for (i = 0; i < 4; ++i) {
     a = (*state)[i][0];
     b = (*state)[i][1];
     c = (*state)[i][2];
@@ -293,20 +276,16 @@ TORCH_CSPRNG_HOST_DEVICE void InvMixColumns(state_t* state)
 
 // The SubBytes Function Substitutes the values in the
 // state matrix with values in an S-box.
-TORCH_CSPRNG_HOST_DEVICE void InvSubBytes(state_t* state)
-{
+TORCH_CSPRNG_HOST_DEVICE void InvSubBytes(state_t *state) {
   uint8_t i, j;
-  for (i = 0; i < 4; ++i)
-  {
-    for (j = 0; j < 4; ++j)
-    {
+  for (i = 0; i < 4; ++i) {
+    for (j = 0; j < 4; ++j) {
       (*state)[j][i] = getSBoxInvert((*state)[j][i]);
     }
   }
 }
 
-TORCH_CSPRNG_HOST_DEVICE void InvShiftRows(state_t* state)
-{
+TORCH_CSPRNG_HOST_DEVICE void InvShiftRows(state_t *state) {
   uint8_t temp;
 
   // Rotate first row 1 columns to right
@@ -333,56 +312,54 @@ TORCH_CSPRNG_HOST_DEVICE void InvShiftRows(state_t* state)
   (*state)[3][3] = temp;
 }
 
-TORCH_CSPRNG_HOST_DEVICE void encrypt(uint8_t* state, const uint8_t* key) {
+TORCH_CSPRNG_HOST_DEVICE void encrypt(uint8_t *state, const uint8_t *key) {
   uint8_t RoundKey[176];
-  KeyExpansion(RoundKey, key); 
+  KeyExpansion(RoundKey, key);
 
   uint8_t round = 0;
 
   // Add the First round key to the state before starting the rounds.
-  AddRoundKey(0, (state_t*)state, RoundKey);
+  AddRoundKey(0, (state_t *)state, RoundKey);
 
   // There will be Nr rounds.
   // The first Nr-1 rounds are identical.
   // These Nr rounds are executed in the loop below.
   // Last one without MixColumns()
-  for (round = 1; ; ++round)
-  {
-    SubBytes((state_t*)state);
-    ShiftRows((state_t*)state);
+  for (round = 1;; ++round) {
+    SubBytes((state_t *)state);
+    ShiftRows((state_t *)state);
     if (round == Nr) {
       break;
     }
-    MixColumns((state_t*)state);
-    AddRoundKey(round, (state_t*)state, RoundKey);
+    MixColumns((state_t *)state);
+    AddRoundKey(round, (state_t *)state, RoundKey);
   }
   // Add round key to last round
-  AddRoundKey(Nr, (state_t*)state, RoundKey);
+  AddRoundKey(Nr, (state_t *)state, RoundKey);
 }
 
-TORCH_CSPRNG_HOST_DEVICE void decrypt(uint8_t* state, const uint8_t* key) {
+TORCH_CSPRNG_HOST_DEVICE void decrypt(uint8_t *state, const uint8_t *key) {
   uint8_t RoundKey[176];
   KeyExpansion(RoundKey, key);
 
   uint8_t round = 0;
 
   // Add the First round key to the state before starting the rounds.
-  AddRoundKey(Nr, (state_t*)state, RoundKey);
+  AddRoundKey(Nr, (state_t *)state, RoundKey);
 
   // There will be Nr rounds.
   // The first Nr-1 rounds are identical.
   // These Nr rounds are executed in the loop below.
   // Last one without InvMixColumn()
-  for (round = (Nr - 1); ; --round)
-  {
-    InvShiftRows((state_t*)state);
-    InvSubBytes((state_t*)state);
-    AddRoundKey(round, (state_t*)state, RoundKey);
+  for (round = (Nr - 1);; --round) {
+    InvShiftRows((state_t *)state);
+    InvSubBytes((state_t *)state);
+    AddRoundKey(round, (state_t *)state, RoundKey);
     if (round == 0) {
       break;
     }
-    InvMixColumns((state_t*)state);
+    InvMixColumns((state_t *)state);
   }
 }
 
-}
+}  // namespace aes
diff --git a/torchcsprng/block_cipher.h b/torchcsprng/block_cipher.h
new file mode 100644
index 0000000..aeae133
--- /dev/null
+++ b/torchcsprng/block_cipher.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include "macros.h"
+#include <ATen/ATen.h>
+#include <ATen/native/TensorIterator.h>
+#include "OffsetCalculator.cuh"
+#include <ATen/Parallel.h>
+#include <cstdint>
+#include <mutex>
+
+#if defined(__CUDACC__) || defined(__HIPCC__)
+#include <c10/cuda/CUDAStream.h>
+#include <ATen/cuda/Exceptions.h>
+#endif
+
+#if defined(__CUDACC__) || defined(__HIPCC__)
+#define UNROLL_IF_CUDA #pragma unroll
+#else
+#define UNROLL_IF_CUDA
+#endif
+
+namespace torch {
+namespace csprng {
+
+template <typename input_index_calc_t>
+TORCH_CSPRNG_HOST_DEVICE static void copy_input_to_block(int64_t idx, uint8_t *block, int block_size, void *input_ptr,
+  int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc) {
+  for (auto i = 0; i < block_size / input_type_size; ++i) {
+    const auto linear_index = idx * (block_size / input_type_size) + i;
+    if (linear_index < input_numel) {
+      std::memcpy(block + i * input_type_size,
+        &(reinterpret_cast<uint8_t *>(input_ptr)[input_index_calc(linear_index)]), input_type_size);
+    }
+  }
+}
+
+template <typename output_index_calc_t>
+TORCH_CSPRNG_HOST_DEVICE static void copy_block_to_output(int64_t idx, uint8_t *block, int output_elem_per_block,
+  void *output_ptr, int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc) {
+  for (auto i = 0; i < output_elem_per_block; ++i) {
+    const auto linear_index = idx * output_elem_per_block + i;
+    if (linear_index < output_numel) {
+      std::memcpy(&(reinterpret_cast<uint8_t *>(output_ptr)[output_index_calc(linear_index)]),
+        block + i * output_type_size, output_type_size);
+    }
+  }
+}
+
+template <int block_size, typename cipher_t, typename input_index_calc_t, typename output_index_calc_t,
+  typename transform_t>
+TORCH_CSPRNG_HOST_DEVICE static void block_cipher_kernel_helper(int64_t idx, cipher_t cipher, int output_elem_per_block,
+  void *input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, void *output_ptr,
+  int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc, transform_t transform) {
+  uint8_t block[block_size];
+  std::memset(&block, 0, block_size);  // is it ok to use zeros as padding?
+  if (input_ptr != nullptr) {
+    copy_input_to_block(idx, block, block_size, input_ptr, input_numel, input_type_size, input_index_calc);
+  }
+  cipher(idx, block);
+  transform(block);
+  copy_block_to_output(
+    idx, block, output_elem_per_block, output_ptr, output_numel, output_type_size, output_index_calc);
+}
+
+#if defined(__CUDACC__) || defined(__HIPCC__)
+template <int block_size, typename cipher_t, typename input_index_calc_t, typename output_index_calc_t,
+  typename transform_t>
+__global__ static void block_cipher_kernel_cuda(cipher_t cipher, int output_elem_per_block, void *input_ptr,
+  int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, void *output_ptr, int64_t output_numel,
+  int output_type_size, output_index_calc_t output_index_calc, transform_t transform) {
+  const auto idx = blockIdx.x * blockDim.x + threadIdx.x;
+  block_cipher_kernel_helper<block_size>(idx, cipher, output_elem_per_block, input_ptr, input_numel, input_type_size,
+    input_index_calc, output_ptr, output_numel, output_type_size, output_index_calc, transform);
+}
+#endif
+
+template <int block_size, typename cipher_t, typename input_index_calc_t, typename output_index_calc_t,
+  typename transform_t>
+static void block_cipher_kernel_cpu_serial(int64_t begin, int64_t end, cipher_t cipher, int output_elem_per_block,
+  void *input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, void *output_ptr,
+  int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc, transform_t transform) {
+  for (auto idx = begin; idx < end; ++idx) {
+    block_cipher_kernel_helper<block_size>(idx, cipher, output_elem_per_block, input_ptr, input_numel, input_type_size,
+      input_index_calc, output_ptr, output_numel, output_type_size, output_index_calc, transform);
+  }
+}
+
+template <int block_size, typename cipher_t, typename input_index_calc_t, typename output_index_calc_t,
+  typename transform_t>
+static void block_cipher_kernel_cpu(int64_t total, cipher_t cipher, int output_elem_per_block, void *input_ptr,
+  int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, void *output_ptr, int64_t output_numel,
+  int output_type_size, output_index_calc_t output_index_calc, transform_t transform_func) {
+  if (total < at::internal::GRAIN_SIZE || at::get_num_threads() == 1) {
+    block_cipher_kernel_cpu_serial<block_size>(0, total, cipher, output_elem_per_block, input_ptr, input_numel,
+      input_type_size, input_index_calc, output_ptr, output_numel, output_type_size, output_index_calc, transform_func);
+  } else {
+    at::parallel_for(0, total, at::internal::GRAIN_SIZE, [&](int64_t begin, int64_t end) {
+      block_cipher_kernel_cpu_serial<block_size>(begin, end, cipher, output_elem_per_block, input_ptr, input_numel,
+        input_type_size, input_index_calc, output_ptr, output_numel, output_type_size, output_index_calc,
+        transform_func);
+    });
+  }
+}
+
+template <int block_size, typename cipher_t, typename input_index_calc_t, typename output_index_calc_t,
+  typename transform_t>
+void block_cipher(void *input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc,
+  void *output_ptr, int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc,
+  at::Device device, cipher_t cipher, int output_elem_per_block, transform_t transform_func) {
+  if (output_ptr == nullptr || output_numel == 0) {
+    return;
+  }
+
+  if (device.type() == at::kCPU) {
+    const auto total = (output_numel + output_elem_per_block - 1) / output_elem_per_block;
+    block_cipher_kernel_cpu<block_size>(total, cipher, output_elem_per_block, input_ptr, input_numel, input_type_size,
+      input_index_calc, output_ptr, output_numel, output_type_size, output_index_calc, transform_func);
+  } else if (device.type() == at::kCUDA) {
+#if defined(__CUDACC__) || defined(__HIPCC__)
+    const auto threads = 256;
+    const auto grid = (output_numel + (threads * output_elem_per_block) - 1) / (threads * output_elem_per_block);
+    auto stream = at::cuda::getCurrentCUDAStream();
+    block_cipher_kernel_cuda<block_size><<<grid, threads, 0, stream>>>(cipher, output_elem_per_block, input_ptr,
+      input_numel, input_type_size, input_index_calc, output_ptr, output_numel, output_type_size, output_index_calc,
+      transform_func);
+    AT_CUDA_CHECK(cudaGetLastError());
+#else
+    TORCH_CHECK(false, "torchcsprng was compiled without CUDA support");
+#endif
+  } else {
+    TORCH_CHECK(false, "block_cipher supports only CPU and CUDA devices");
+  }
+}
+
+template <int block_size, typename cipher_t>
+void block_cipher(at::Tensor input, at::Tensor output, cipher_t cipher) {
+  const auto input_ptr = input.data_ptr();
+  const auto input_numel = input.numel();
+
+  // Otherwise OffsetCalculator/IntDivider crashes with integer division by zero
+  if (input_ptr == nullptr || input_numel == 0) {
+    return;
+  }
+
+  const auto input_type_size = input.element_size();
+  const auto input_offset_calc = make_offset_calculator<1>(at::TensorIterator::nullary_op(input));
+  const auto input_index_calc = [input_offset_calc] TORCH_CSPRNG_HOST_DEVICE(
+                                  uint32_t li) -> uint32_t { return input_offset_calc.get(li)[0]; };
+
+  const auto output_ptr = output.data_ptr();
+  const auto output_numel = output.numel();
+
+  // Otherwise OffsetCalculator/IntDivider crashes with integer division by zero
+  if (output_ptr == nullptr || output_numel == 0) {
+    return;
+  }
+
+  const auto output_type_size = output.element_size();
+  const auto output_offset_calc = make_offset_calculator<1>(at::TensorIterator::nullary_op(output));
+  const auto output_index_calc = [output_offset_calc] TORCH_CSPRNG_HOST_DEVICE(
+                                   uint32_t li) -> uint32_t { return output_offset_calc.get(li)[0]; };
+
+  const auto device = output.device();
+
+  torch::csprng::block_cipher<block_size>(input_ptr, input_numel, input_type_size, input_index_calc, output_ptr,
+    output_numel, output_type_size, output_index_calc, device, cipher, block_size / output_type_size,
+    [] TORCH_CSPRNG_HOST_DEVICE(uint8_t * x) {});
+}
+
+}  // namespace csprng
+}  // namespace torch
diff --git a/torchcsprng/csprng.cpp b/torchcsprng/csprng.cpp
new file mode 100644
index 0000000..26c527d
--- /dev/null
+++ b/torchcsprng/csprng.cpp
@@ -0,0 +1,292 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <torch/extension.h>
+#include <torch/library.h>
+
+#include <ATen/Generator.h>
+#include <ATen/Tensor.h>
+#include <ATen/core/op_registration/op_registration.h>
+
+#include "kernels_commons.h"
+#include "kernels.cuh"
+
+using namespace at;
+using namespace torch::csprng;
+
+static const auto GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE = "generator does not support tensor device type";
+static const auto TENSOR_DEVICE_TYPE_IS_NOT_SUPPORTED = "tensor device type is not supported";
+
+// ==================================================== Random ========================================================
+
+Tensor &random_(Tensor &self, c10::optional<Generator> gen) {
+  if (self.device().type() == DeviceType::CUDA) {
+    return torch::csprng::cuda::random_(self, gen);
+  } else {
+    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
+  }
+}
+
+Tensor &random_from_to(Tensor &self, int64_t from, optional<int64_t> to, c10::optional<Generator> gen) {
+  if (self.device().type() == DeviceType::CUDA) {
+    return torch::csprng::cuda::random_from_to(self, from, to, gen);
+  } else {
+    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
+  }
+}
+
+Tensor &random_to(Tensor &self, int64_t to, c10::optional<Generator> gen) {
+  if (self.device().type() == DeviceType::CUDA) {
+    return torch::csprng::cuda::random_to(self, to, gen);
+  } else {
+    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
+  }
+}
+
+// ==================================================== Uniform =======================================================
+
+Tensor &uniform_(Tensor &self, double from, double to, c10::optional<Generator> gen) {
+  if (self.device().type() == DeviceType::CUDA) {
+    return torch::csprng::cuda::uniform_(self, from, to, gen);
+  } else {
+    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
+  }
+}
+
+// ==================================================== Normal ========================================================
+
+Tensor &normal_(Tensor &self, double mean, double std, c10::optional<Generator> gen) {
+  if (self.device().type() == DeviceType::CUDA) {
+    return torch::csprng::cuda::normal_(self, mean, std, gen);
+  } else {
+    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
+  }
+}
+
+Tensor &normal_Tensor_float_out(const Tensor &mean, double std, c10::optional<Generator> gen, Tensor &output) {
+  if (output.device().type() == DeviceType::CUDA) {
+    return torch::csprng::cuda::normal_Tensor_float_out(output, mean, std, gen);
+  } else {
+    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
+  }
+}
+
+Tensor &normal_float_Tensor_out(double mean, const Tensor &std, c10::optional<Generator> gen, Tensor &output) {
+  if (output.device().type() == DeviceType::CUDA) {
+    return torch::csprng::cuda::normal_float_Tensor_out(output, mean, std, gen);
+  } else {
+    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
+  }
+}
+
+Tensor &normal_Tensor_Tensor_out(const Tensor &mean, const Tensor &std, c10::optional<Generator> gen, Tensor &output) {
+  if (output.device().type() == DeviceType::CUDA) {
+    return torch::csprng::cuda::normal_Tensor_Tensor_out(output, mean, std, gen);
+  } else {
+    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
+  }
+}
+
+Tensor normal_Tensor_float(const Tensor &mean, double std, c10::optional<Generator> gen) {
+  if (mean.device().type() == DeviceType::CUDA) {
+    return torch::csprng::cuda::normal_Tensor_float(mean, std, gen);
+  } else {
+    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
+  }
+}
+
+Tensor normal_float_Tensor(double mean, const Tensor &std, c10::optional<Generator> gen) {
+  if (std.device().type() == DeviceType::CUDA) {
+    return torch::csprng::cuda::normal_float_Tensor(mean, std, gen);
+  } else {
+    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
+  }
+}
+
+Tensor normal_Tensor_Tensor(const Tensor &mean, const Tensor &std, c10::optional<Generator> gen) {
+  if (mean.device().type() == DeviceType::CUDA) {
+    return torch::csprng::cuda::normal_Tensor_Tensor(mean, std, gen);
+  } else {
+    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
+  }
+}
+
+// ==================================================== Cauchy ========================================================
+
+Tensor &cauchy_(Tensor &self, double median, double sigma, c10::optional<Generator> gen) {
+  if (self.device().type() == DeviceType::CUDA) {
+    return torch::csprng::cuda::cauchy_(self, median, sigma, gen);
+  } else {
+    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
+  }
+}
+
+// ================================================== LogNormal =======================================================
+
+Tensor &log_normal_(Tensor &self, double mean, double std, c10::optional<Generator> gen) {
+  if (self.device().type() == DeviceType::CUDA) {
+    return torch::csprng::cuda::log_normal_(self, mean, std, gen);
+  } else {
+    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
+  }
+}
+
+// ================================================== Geometric =======================================================
+
+Tensor &geometric_(Tensor &self, double p, c10::optional<Generator> gen) {
+  if (self.device().type() == DeviceType::CUDA) {
+    return torch::csprng::cuda::geometric_(self, p, gen);
+  } else {
+    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
+  }
+}
+
+// ================================================== Exponential =====================================================
+
+Tensor &exponential_(Tensor &self, double lambda, c10::optional<Generator> gen) {
+  if (self.device().type() == DeviceType::CUDA) {
+    return torch::csprng::cuda::exponential_(self, lambda, gen);
+  } else {
+    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
+  }
+}
+
+// =============================================== Random permutation =================================================
+
+// randperm implementation was copied from PyTorch to unblock CSPRNG users, but ultimately CSPRNG must reuse
+// refactored randperm from PyTorch, see https://github.com/pytorch/pytorch/issues/43816
+
+namespace {
+
+inline void check_supported_max_int_with_precision(int64_t n, const Tensor &tensor) {
+  TORCH_CHECK(at::scalar_tensor(n, tensor.options()).defined(), "n is too large for result tensor type: '",
+    tensor.toString(), "'");
+
+  // Ensure sufficient precision for floating point representation.
+  switch (tensor.scalar_type()) {
+    case at::ScalarType::Half:
+      TORCH_CHECK(n <= (int64_t(1) << 11) + 1, "n cannot be greater than 2049 for Half type.");
+      break;
+    case at::ScalarType::Float:
+      TORCH_CHECK(n <= (int64_t(1) << 24) + 1, "n cannot be greater than 2^24+1 for Float type.");
+      break;
+    case at::ScalarType::Double:  // Unlikely to happen, but doesn't hurt to check
+      TORCH_CHECK(n <= (int64_t(1) << 53) + 1, "n cannot be greater than 2^53+1 for Double type.");
+      break;
+    default:
+      break;
+  }
+}
+
+template <typename scalar_t, typename RNG>
+void randperm(Tensor &result, int64_t n, c10::optional<at::Generator> generator) {
+  auto gen = at::check_generator<RNG>(generator);
+  scalar_t *r__data = result.data_ptr<scalar_t>();
+
+  result.resize_({n});
+  int64_t r__stride_0 = result.stride(0);
+
+  at::parallel_for(0, n, internal::GRAIN_SIZE, [&r__data, &r__stride_0](int64_t p_begin, int64_t p_end) {
+    for (int64_t i = p_begin; i < p_end; i++) r__data[i * r__stride_0] = static_cast<scalar_t>(i);
+  });
+
+  for (int64_t i = 0; i < n - 1; i++) {
+    int64_t z = gen->random() % (n - i);
+    scalar_t sav = r__data[i * r__stride_0];
+    r__data[i * r__stride_0] = r__data[(z + i) * r__stride_0];
+    r__data[(z + i) * r__stride_0] = sav;
+  }
+}
+}  // namespace
+
+Tensor &randperm_generator_out(int64_t n, c10::optional<Generator> generator, Tensor &result) {
+  TORCH_CHECK(n >= 0, "n must be non-negative, got", n);
+  check_supported_max_int_with_precision(n, result);
+  if (result.device().type() == at::kCUDA) {
+    auto result_cpu = at::empty({n}, result.options().device(kCPU));
+    randperm_generator_out(n, generator, result_cpu);
+    result.resize_({n});
+    return result.copy_(result_cpu);
+  }
+  result.resize_({n});
+  // See Note [Acquire lock when using random generators]
+  std::lock_guard<std::mutex> lock(generator->mutex());
+  AT_DISPATCH_ALL_TYPES_AND(at::ScalarType::Half, result.scalar_type(), "randperm",
+    [&]() -> void { randperm<scalar_t, CSPRNGGeneratorImpl>(result, n, generator); });
+  return result;
+}
+
+// ================================================Encrypt/Decrypt=====================================================
+
+Tensor encrypt_pybind(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode) {
+  if (input.device().type() == DeviceType::CUDA) {
+    return torch::csprng::cuda::encrypt(input, output, key, cipher, mode);
+  } else {
+    TORCH_CHECK(false, TENSOR_DEVICE_TYPE_IS_NOT_SUPPORTED);
+  }
+}
+
+Tensor decrypt_pybind(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode) {
+  if (input.device().type() == DeviceType::CUDA) {
+    return torch::csprng::cuda::decrypt(input, output, key, cipher, mode);
+  } else {
+    TORCH_CHECK(false, TENSOR_DEVICE_TYPE_IS_NOT_SUPPORTED);
+  }
+}
+
+// ====================================================================================================================
+
+Generator create_random_device_generator(c10::optional<std::string> token = c10::nullopt) {
+  if (token.has_value()) {
+    return make_generator<CSPRNGGeneratorImpl>(*token);
+  } else {
+    return make_generator<CSPRNGGeneratorImpl>(true);
+  }
+}
+
+Generator create_mt19937_generator(c10::optional<uint64_t> seed = c10::nullopt) {
+  if (seed.has_value()) {
+    return make_generator<CSPRNGGeneratorImpl>(*seed);
+  } else {
+    return make_generator<CSPRNGGeneratorImpl>(false);
+  }
+}
+
+bool supports_cuda() {
+#ifdef WITH_CUDA
+  return true;
+#else
+  return false;
+#endif
+}
+
+TORCH_LIBRARY_IMPL(aten, CustomRNGKeyId, m) {
+  // Random
+  m.impl("random_.from", random_from_to);
+  m.impl("random_.to", random_to);
+  m.impl("random_", random_);
+  // Uniform
+  m.impl("uniform_", uniform_);
+  // Normal
+  m.impl("normal_", normal_);
+  m.impl("normal.Tensor_float_out", normal_Tensor_float_out);
+  m.impl("normal.float_Tensor_out", normal_float_Tensor_out);
+  m.impl("normal.Tensor_Tensor_out", normal_Tensor_Tensor_out);
+  m.impl("normal.Tensor_float", normal_Tensor_float);
+  m.impl("normal.float_Tensor", normal_float_Tensor);
+  m.impl("normal.Tensor_Tensor", normal_Tensor_Tensor);
+  // Cauchy
+  m.impl("cauchy_", cauchy_);
+  // LogNormal
+  m.impl("log_normal_", log_normal_);
+  // Geometric
+  m.impl("geometric_", geometric_);
+  // Exponential
+  m.impl("exponential_", exponential_);
+  // Random permutation
+  m.impl("randperm.generator_out", randperm_generator_out);
+}
diff --git a/torchcsprng/csrc/block_cipher.h b/torchcsprng/csrc/block_cipher.h
deleted file mode 100644
index 5f1e2cf..0000000
--- a/torchcsprng/csrc/block_cipher.h
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#pragma once
-
-#include "macros.h"
-#include <ATen/ATen.h>
-#include <ATen/native/TensorIterator.h>
-#include "OffsetCalculator.cuh"
-#include <ATen/Parallel.h>
-#include <cstdint>
-#include <mutex>
-
-#if defined(__CUDACC__) || defined(__HIPCC__)
-#include <c10/cuda/CUDAStream.h>
-#include <ATen/cuda/Exceptions.h>
-#endif
-
-#if defined(__CUDACC__) || defined(__HIPCC__)
-#define UNROLL_IF_CUDA #pragma unroll
-#else
-#define UNROLL_IF_CUDA
-#endif
-
-namespace torch {
-namespace csprng {
-
-template<typename input_index_calc_t>
-TORCH_CSPRNG_HOST_DEVICE static void copy_input_to_block(int64_t idx, uint8_t* block, int block_size,
-    void* input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc) {
-  for (auto i = 0; i < block_size / input_type_size; ++i) {
-    const auto linear_index = idx * (block_size / input_type_size) + i;
-    if (linear_index < input_numel) {
-      std::memcpy(
-          block + i * input_type_size,
-          &(reinterpret_cast<uint8_t*>(input_ptr)[input_index_calc(linear_index)]),
-          input_type_size
-      );
-    }
-  }
-}
-
-template<typename output_index_calc_t>
-TORCH_CSPRNG_HOST_DEVICE static void copy_block_to_output(int64_t idx, uint8_t* block, int output_elem_per_block,
-    void* output_ptr, int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc) {
-  for (auto i = 0; i < output_elem_per_block; ++i) {
-    const auto linear_index = idx * output_elem_per_block + i;
-    if (linear_index < output_numel) {
-      std::memcpy(
-          &(reinterpret_cast<uint8_t*>(output_ptr)[output_index_calc(linear_index)]),
-          block + i * output_type_size,
-          output_type_size
-      );
-    }
-  }
-}
-
-template<int block_size, typename cipher_t, typename input_index_calc_t, typename output_index_calc_t, typename transform_t>
-TORCH_CSPRNG_HOST_DEVICE static void block_cipher_kernel_helper(
-    int64_t idx, cipher_t cipher, int output_elem_per_block,
-    void* input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc,
-    void* output_ptr, int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc,
-    transform_t transform) {
-  uint8_t block[block_size];
-  std::memset(&block, 0, block_size); // is it ok to use zeros as padding?
-  if (input_ptr != nullptr) {
-    copy_input_to_block(idx, block, block_size, input_ptr, input_numel, input_type_size, input_index_calc);
-  }
-  cipher(idx, block);
-  transform(block);
-  copy_block_to_output(idx, block, output_elem_per_block, output_ptr, output_numel, output_type_size, output_index_calc);
-}
-
-#if defined(__CUDACC__) || defined(__HIPCC__)
-template<int block_size, typename cipher_t, typename input_index_calc_t, typename output_index_calc_t, typename transform_t>
-__global__ static void block_cipher_kernel_cuda(cipher_t cipher, int output_elem_per_block,
-    void* input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc,
-    void* output_ptr, int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc,
-    transform_t transform) {
-  const auto idx = blockIdx.x * blockDim.x + threadIdx.x;
-  block_cipher_kernel_helper<block_size>(idx, cipher, output_elem_per_block,
-    input_ptr, input_numel, input_type_size, input_index_calc,
-    output_ptr, output_numel, output_type_size, output_index_calc,
-    transform);
-}
-#endif
-
-template<int block_size, typename cipher_t, typename input_index_calc_t, typename output_index_calc_t, typename transform_t>
-static void block_cipher_kernel_cpu_serial(int64_t begin, int64_t end, cipher_t cipher, int output_elem_per_block,
-    void* input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc,
-    void* output_ptr, int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc,
-    transform_t transform) {
-  for (auto idx = begin; idx < end; ++idx) {
-    block_cipher_kernel_helper<block_size>(idx, cipher, output_elem_per_block,
-      input_ptr, input_numel, input_type_size, input_index_calc,
-      output_ptr, output_numel, output_type_size, output_index_calc,
-      transform);
-  }
-}
-
-template<int block_size, typename cipher_t, typename input_index_calc_t, typename output_index_calc_t, typename transform_t>
-static void block_cipher_kernel_cpu(int64_t total, cipher_t cipher, int output_elem_per_block,
-    void* input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc,
-    void* output_ptr, int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc,
-    transform_t transform_func) {
-  if (total < at::internal::GRAIN_SIZE || at::get_num_threads() == 1) {
-    block_cipher_kernel_cpu_serial<block_size>(0, total, cipher, output_elem_per_block,
-      input_ptr, input_numel, input_type_size, input_index_calc,
-      output_ptr, output_numel, output_type_size, output_index_calc,
-      transform_func);
-  } else {
-    at::parallel_for(0, total, at::internal::GRAIN_SIZE, [&](int64_t begin, int64_t end) {
-      block_cipher_kernel_cpu_serial<block_size>(begin, end, cipher, output_elem_per_block,
-        input_ptr, input_numel, input_type_size, input_index_calc,
-        output_ptr, output_numel, output_type_size, output_index_calc,
-        transform_func);
-    });
-  }
-}
-
-template<int block_size, typename cipher_t, typename input_index_calc_t, typename output_index_calc_t, typename transform_t>
-void block_cipher(
-    void* input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc,
-    void* output_ptr, int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc,
-    at::Device device, cipher_t cipher, int output_elem_per_block, transform_t transform_func) {
-  if (output_ptr == nullptr || output_numel == 0) {
-    return;
-  }
-
-  if (device.type() == at::kCPU) {
-    const auto total = (output_numel + output_elem_per_block - 1) / output_elem_per_block;
-    block_cipher_kernel_cpu<block_size>(total,
-        cipher, output_elem_per_block,
-        input_ptr, input_numel, input_type_size, input_index_calc,
-        output_ptr, output_numel, output_type_size, output_index_calc,
-        transform_func
-    );
-  } else if (device.type() == at::kCUDA) {
-#if defined(__CUDACC__) || defined(__HIPCC__)
-    const auto threads = 256;
-    const auto grid = (output_numel + (threads * output_elem_per_block) - 1) / (threads * output_elem_per_block);
-    auto stream = at::cuda::getCurrentCUDAStream();
-    block_cipher_kernel_cuda<block_size><<<grid, threads, 0, stream>>>(
-        cipher, output_elem_per_block,
-        input_ptr, input_numel, input_type_size, input_index_calc,
-        output_ptr, output_numel, output_type_size, output_index_calc,
-        transform_func
-    );
-    AT_CUDA_CHECK(cudaGetLastError());
-#else
-    TORCH_CHECK(false, "torchcsprng was compiled without CUDA support");
-#endif
-  } else {
-    TORCH_CHECK(false, "block_cipher supports only CPU and CUDA devices");
-  }
-}
-
-template<int block_size, typename cipher_t>
-void block_cipher(at::Tensor input, at::Tensor output, cipher_t cipher) {
-  const auto input_ptr = input.data_ptr();
-  const auto input_numel = input.numel();
-
-  // Otherwise OffsetCalculator/IntDivider crashes with integer division by zero
-  if (input_ptr == nullptr || input_numel == 0) {
-    return;
-  }
-
-  const auto input_type_size = input.element_size();
-  const auto input_offset_calc = make_offset_calculator<1>(at::TensorIterator::nullary_op(input));
-  const auto input_index_calc = [input_offset_calc] TORCH_CSPRNG_HOST_DEVICE (uint32_t li) -> uint32_t {
-    return input_offset_calc.get(li)[0];
-  };
-
-  const auto output_ptr = output.data_ptr();
-  const auto output_numel = output.numel();
-
-  // Otherwise OffsetCalculator/IntDivider crashes with integer division by zero
-  if (output_ptr == nullptr || output_numel == 0) {
-    return;
-  }
-
-  const auto output_type_size = output.element_size();
-  const auto output_offset_calc = make_offset_calculator<1>(at::TensorIterator::nullary_op(output));
-  const auto output_index_calc = [output_offset_calc] TORCH_CSPRNG_HOST_DEVICE (uint32_t li) -> uint32_t {
-    return output_offset_calc.get(li)[0];
-  };
-
-  const auto device = output.device();
-
-  torch::csprng::block_cipher<block_size>(
-      input_ptr, input_numel, input_type_size, input_index_calc,
-      output_ptr, output_numel, output_type_size, output_index_calc,
-      device, cipher, block_size / output_type_size,
-      [] TORCH_CSPRNG_HOST_DEVICE (uint8_t* x) {});
-}
-
-}}
diff --git a/torchcsprng/csrc/cpu/kernels.cpp b/torchcsprng/csrc/cpu/kernels.cpp
deleted file mode 100644
index 395810a..0000000
--- a/torchcsprng/csrc/cpu/kernels.cpp
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#include "../kernels_commons.h"
-
-namespace torch {
-namespace csprng {
-namespace cpu {
-
-#include "../kernels_body.inc"
-
-}}}
diff --git a/torchcsprng/csrc/cpu/kernels.h b/torchcsprng/csrc/cpu/kernels.h
deleted file mode 100644
index b907408..0000000
--- a/torchcsprng/csrc/cpu/kernels.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#pragma once
-
-#include <ATen/Generator.h>
-#include <ATen/Tensor.h>
-
-namespace torch {
-namespace csprng {
-namespace cpu {
-
-#include "../kernels_decls.inc"
-
-}}}
diff --git a/torchcsprng/csrc/csprng.cpp b/torchcsprng/csrc/csprng.cpp
deleted file mode 100644
index 8ae61eb..0000000
--- a/torchcsprng/csrc/csprng.cpp
+++ /dev/null
@@ -1,377 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#include <torch/extension.h>
-#include <torch/library.h>
-
-#include <ATen/Generator.h>
-#include <ATen/Tensor.h>
-#include <ATen/core/op_registration/op_registration.h>
-
-#include "kernels_commons.h"
-#include "cpu/kernels.h"
-#ifdef WITH_CUDA
-#include "cuda/kernels.cuh"
-#endif
-
-using namespace at;
-using namespace torch::csprng;
-
-static const auto GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE = "generator does not support tensor device type";
-static const auto TENSOR_DEVICE_TYPE_IS_NOT_SUPPORTED = "tensor device type is not supported";
-
-// ==================================================== Random ========================================================
-
-Tensor& random_(Tensor& self, c10::optional<Generator> gen) {
-  if (self.device().type() == DeviceType::CPU) {
-    return cpu::random_(self, gen);
-#ifdef WITH_CUDA
-  } else if (self.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::random_(self, gen);
-#endif
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-Tensor& random_from_to(Tensor& self, int64_t from, optional<int64_t> to,
-                       c10::optional<Generator> gen) {
-  if (self.device().type() == DeviceType::CPU) {
-    return cpu::random_from_to(self, from, to, gen);
-#ifdef WITH_CUDA
-  } else if (self.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::random_from_to(self, from, to, gen);
-#endif
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-Tensor& random_to(Tensor& self, int64_t to,
-                  c10::optional<Generator> gen) {
-  if (self.device().type() == DeviceType::CPU) {
-    return cpu::random_to(self, to, gen);
-#ifdef WITH_CUDA
-  } else if (self.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::random_to(self, to, gen);
-#endif
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-// ==================================================== Uniform =======================================================
-
-Tensor& uniform_(Tensor& self, double from, double to, c10::optional<Generator> gen) {
-  if (self.device().type() == DeviceType::CPU) {
-    return cpu::uniform_(self, from, to, gen);
-#ifdef WITH_CUDA
-  } else if (self.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::uniform_(self, from, to, gen);
-#endif
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-// ==================================================== Normal ========================================================
-
-Tensor& normal_(Tensor& self, double mean, double std, c10::optional<Generator> gen) {
-  if (self.device().type() == DeviceType::CPU) {
-    return cpu::normal_(self, mean, std, gen);
-#ifdef WITH_CUDA
-  } else if (self.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::normal_(self, mean, std, gen);
-#endif
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-Tensor& normal_Tensor_float_out(const Tensor& mean, double std, c10::optional<Generator> gen, Tensor& output) {
-  if (output.device().type() == DeviceType::CPU) {
-    return cpu::normal_Tensor_float_out(output, mean, std, gen);
-#ifdef WITH_CUDA
-  } else if (output.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::normal_Tensor_float_out(output, mean, std, gen);
-#endif
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-Tensor& normal_float_Tensor_out(double mean, const Tensor& std, c10::optional<Generator> gen, Tensor& output) {
-  if (output.device().type() == DeviceType::CPU) {
-    return cpu::normal_float_Tensor_out(output, mean, std, gen);
-#ifdef WITH_CUDA
-  } else if (output.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::normal_float_Tensor_out(output, mean, std, gen);
-#endif
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-Tensor& normal_Tensor_Tensor_out(const Tensor& mean, const Tensor& std, c10::optional<Generator> gen, Tensor& output) {
-  if (output.device().type() == DeviceType::CPU) {
-    return cpu::normal_Tensor_Tensor_out(output, mean, std, gen);
-#ifdef WITH_CUDA
-  } else if (output.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::normal_Tensor_Tensor_out(output, mean, std, gen);
-#endif
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-Tensor normal_Tensor_float(const Tensor& mean, double std, c10::optional<Generator> gen) {
-  if (mean.device().type() == DeviceType::CPU) {
-    return cpu::normal_Tensor_float(mean, std, gen);
-#ifdef WITH_CUDA
-  } else if (mean.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::normal_Tensor_float(mean, std, gen);
-#endif
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-Tensor normal_float_Tensor(double mean, const Tensor& std, c10::optional<Generator> gen) {
-  if (std.device().type() == DeviceType::CPU) {
-    return cpu::normal_float_Tensor(mean, std, gen);
-#ifdef WITH_CUDA
-  } else if (std.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::normal_float_Tensor(mean, std, gen);
-#endif
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-Tensor normal_Tensor_Tensor(const Tensor& mean, const Tensor& std, c10::optional<Generator> gen) {
-  if (mean.device().type() == DeviceType::CPU) {
-    return cpu::normal_Tensor_Tensor(mean, std, gen);
-#ifdef WITH_CUDA
-  } else if (mean.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::normal_Tensor_Tensor(mean, std, gen);
-#endif
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-// ==================================================== Cauchy ========================================================
-
-Tensor& cauchy_(Tensor& self, double median, double sigma, c10::optional<Generator> gen) {
-  if (self.device().type() == DeviceType::CPU) {
-    return cpu::cauchy_(self, median, sigma, gen);
-#ifdef WITH_CUDA
-  } else if (self.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::cauchy_(self, median, sigma, gen);
-#endif
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-// ================================================== LogNormal =======================================================
-
-Tensor& log_normal_(Tensor& self, double mean, double std, c10::optional<Generator> gen) {
-  if (self.device().type() == DeviceType::CPU) {
-    return cpu::log_normal_(self, mean, std, gen);
-#ifdef WITH_CUDA
-  } else if (self.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::log_normal_(self, mean, std, gen);
-#endif
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-// ================================================== Geometric =======================================================
-
-Tensor& geometric_(Tensor& self, double p, c10::optional<Generator> gen) {
-  if (self.device().type() == DeviceType::CPU) {
-    return cpu::geometric_(self, p, gen);
-#ifdef WITH_CUDA
-  } else if (self.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::geometric_(self, p, gen);
-#endif
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-// ================================================== Exponential =====================================================
-
-Tensor& exponential_(Tensor& self, double lambda, c10::optional<Generator> gen) {
-  if (self.device().type() == DeviceType::CPU) {
-    return cpu::exponential_(self, lambda, gen);
-#ifdef WITH_CUDA
-  } else if (self.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::exponential_(self, lambda, gen);
-#endif
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-// =============================================== Random permutation =================================================
-
-// randperm implementation was copied from PyTorch to unblock CSPRNG users, but ultimately CSPRNG must reuse
-// refactored randperm from PyTorch, see https://github.com/pytorch/pytorch/issues/43816
-
-namespace {
-
-  inline void check_supported_max_int_with_precision(int64_t n, const Tensor& tensor) {
-    TORCH_CHECK(at::scalar_tensor(n, tensor.options()).defined(),
-                "n is too large for result tensor type: '", tensor.toString(), "'");
-
-    // Ensure sufficient precision for floating point representation.
-    switch (tensor.scalar_type()) {
-      case at::ScalarType::Half:
-        TORCH_CHECK(n <= (int64_t(1) << 11) + 1, "n cannot be greater than 2049 for Half type.");
-        break;
-      case at::ScalarType::Float:
-        TORCH_CHECK(n <= (int64_t(1) << 24) + 1, "n cannot be greater than 2^24+1 for Float type.");
-        break;
-      case at::ScalarType::Double:  // Unlikely to happen, but doesn't hurt to check
-        TORCH_CHECK(n <= (int64_t(1) << 53) + 1, "n cannot be greater than 2^53+1 for Double type.");
-        break;
-      default:
-        break;
-    }
-  }
-
-  template <typename scalar_t, typename RNG>
-  void randperm(Tensor& result, int64_t n, c10::optional<at::Generator> generator) {
-    auto gen = at::check_generator<RNG>(generator);
-    scalar_t *r__data = result.data_ptr<scalar_t>();
-
-    result.resize_({n});
-    int64_t r__stride_0 = result.stride(0);
-
-    at::parallel_for(0, n, internal::GRAIN_SIZE,
-                     [&r__data, &r__stride_0](int64_t p_begin, int64_t p_end) {
-                       for(int64_t i = p_begin; i < p_end; i++)
-                         r__data[i*r__stride_0] = static_cast<scalar_t>(i);
-                     });
-
-    for(int64_t i = 0; i < n - 1; i++)
-    {
-      int64_t z = gen->random() % (n-i);
-      scalar_t sav = r__data[i*r__stride_0];
-      r__data[i*r__stride_0] = r__data[(z+i)*r__stride_0];
-      r__data[(z+i)*r__stride_0] = sav;
-    }
-  }
-} // namespace
-
-Tensor& randperm_generator_out(int64_t n, c10::optional<Generator> generator, Tensor& result) {
-  TORCH_CHECK(n >= 0, "n must be non-negative, got", n);
-  check_supported_max_int_with_precision(n, result);
-  if (result.device().type() == at::kCUDA) {
-    auto result_cpu = at::empty({n}, result.options().device(kCPU));
-    randperm_generator_out(n, generator, result_cpu);
-    result.resize_({n});
-    return result.copy_(result_cpu);
-  }
-  result.resize_({n});
-  // See Note [Acquire lock when using random generators]
-  std::lock_guard<std::mutex> lock(generator->mutex());
-  AT_DISPATCH_ALL_TYPES_AND(at::ScalarType::Half, result.scalar_type(), "randperm", [&]() -> void {
-    randperm<scalar_t, CSPRNGGeneratorImpl>(result, n, generator);
-  });
-  return result;
-}
-
-// ================================================Encrypt/Decrypt=====================================================
-
-Tensor encrypt_pybind(Tensor input, Tensor output, Tensor key, const std::string& cipher, const std::string& mode) {
-  if (input.device().type() == DeviceType::CPU) {
-    return cpu::encrypt(input, output, key, cipher, mode);
-#ifdef WITH_CUDA
-  } else if (input.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::encrypt(input, output, key, cipher, mode);
-#endif
-  } else {
-    TORCH_CHECK(false, TENSOR_DEVICE_TYPE_IS_NOT_SUPPORTED);
-  }
-}
-
-Tensor decrypt_pybind(Tensor input, Tensor output, Tensor key, const std::string& cipher, const std::string& mode) {
-  if (input.device().type() == DeviceType::CPU) {
-    return cpu::decrypt(input, output, key, cipher, mode);
-#ifdef WITH_CUDA
-  } else if (input.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::decrypt(input, output, key, cipher, mode);
-#endif
-  } else {
-    TORCH_CHECK(false, TENSOR_DEVICE_TYPE_IS_NOT_SUPPORTED);
-  }
-}
-
-// ====================================================================================================================
-
-Generator create_random_device_generator(c10::optional<std::string> token = c10::nullopt) {
-  if (token.has_value()) {
-    return make_generator<CSPRNGGeneratorImpl>(*token);
-  } else {
-    return make_generator<CSPRNGGeneratorImpl>(true);
-  }
-}
-
-Generator create_mt19937_generator(c10::optional<uint64_t> seed = c10::nullopt) {
-  if (seed.has_value()) {
-    return make_generator<CSPRNGGeneratorImpl>(*seed);
-  } else {
-    return make_generator<CSPRNGGeneratorImpl>(false);
-  }
-}
-
-bool supports_cuda() {
-#ifdef WITH_CUDA
-  return true;
-#else
-  return false;
-#endif
-}
-
-TORCH_LIBRARY_IMPL(aten, CustomRNGKeyId, m) {
-  // Random
-  m.impl("random_.from",             random_from_to);
-  m.impl("random_.to",               random_to);
-  m.impl("random_",                  random_);
-  // Uniform
-  m.impl("uniform_",                 uniform_);
-  // Normal
-  m.impl("normal_",                  normal_);
-  m.impl("normal.Tensor_float_out",  normal_Tensor_float_out);
-  m.impl("normal.float_Tensor_out",  normal_float_Tensor_out);
-  m.impl("normal.Tensor_Tensor_out", normal_Tensor_Tensor_out);
-  m.impl("normal.Tensor_float",      normal_Tensor_float);
-  m.impl("normal.float_Tensor",      normal_float_Tensor);
-  m.impl("normal.Tensor_Tensor",     normal_Tensor_Tensor);
-  // Cauchy
-  m.impl("cauchy_",                  cauchy_);
-  // LogNormal
-  m.impl("log_normal_",              log_normal_);
-  // Geometric
-  m.impl("geometric_",               geometric_);
-  // Exponential
-  m.impl("exponential_",             exponential_);
-  // Random permutation
-  m.impl("randperm.generator_out",   randperm_generator_out);
-}
-
-PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
-  m.def("supports_cuda", &supports_cuda);
-  m.def("create_random_device_generator", &create_random_device_generator, py::arg("token") = nullptr);
-  m.def("create_mt19937_generator", &create_mt19937_generator, py::arg("seed") = nullptr);
-  m.def("encrypt", &encrypt_pybind);
-  m.def("decrypt", &decrypt_pybind);
-}
diff --git a/torchcsprng/csrc/cuda/kernels.cu b/torchcsprng/csrc/cuda/kernels.cu
deleted file mode 100644
index da57d0a..0000000
--- a/torchcsprng/csrc/cuda/kernels.cu
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#include "../kernels_commons.h"
-
-namespace torch {
-namespace csprng {
-namespace cuda {
-
-#include "../kernels_body.inc"
-
-}}}
diff --git a/torchcsprng/csrc/cuda/kernels.cuh b/torchcsprng/csrc/cuda/kernels.cuh
deleted file mode 100644
index 586f1fd..0000000
--- a/torchcsprng/csrc/cuda/kernels.cuh
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#pragma once
-
-#include <ATen/Generator.h>
-#include <ATen/Tensor.h>
-
-namespace torch {
-namespace csprng {
-namespace cuda {
-
-#include "../kernels_decls.inc"
-
-}}}
diff --git a/torchcsprng/csrc/kernels_body.inc b/torchcsprng/csrc/kernels_body.inc
deleted file mode 100644
index 097ae09..0000000
--- a/torchcsprng/csrc/kernels_body.inc
+++ /dev/null
@@ -1,437 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#include "aes.inc"
-
-// Generates `block_t_size`-bytes random key Tensor on CPU
-// using `generator`, which must be an instance of `at::CPUGeneratorImpl`
-// and passes it to the `device`.
-template<typename RNG>
-at::Tensor key_tensor(size_t block_t_size, c10::optional<at::Generator> generator) {
-  std::lock_guard<std::mutex> lock(generator->mutex());
-  auto gen = at::check_generator<RNG>(generator);
-  auto key = torch::empty({static_cast<signed long>(block_t_size)}, torch::kUInt8);
-  using random_t = typename std::result_of<decltype(&RNG::random)(RNG)>::type;
-  constexpr size_t random_t_size = sizeof(random_t);
-  for (size_t i = 0; i < block_t_size / random_t_size; i++) {
-    const auto rand = gen->random();
-    for (size_t j = 0; j < random_t_size; j++) {
-      size_t k = i * random_t_size + j;
-      key[k] = static_cast<uint8_t>((rand >> (j * 8)) & 0xff);
-    }
-  }
-  return key;
-}
-
-template<typename RNG>
-at::Tensor aes128_key_tensor(at::Generator generator) {
-      return key_tensor<RNG>(aes::block_t_size, generator);
-}
-
-// ====================================================================================================================
-
-// A simple container for random state sub-blocks that implements RNG interface
-// with random() and random64() methods, that are used by transformation function
-template<size_t size>
-struct RNGValues {
-  TORCH_CSPRNG_HOST_DEVICE RNGValues(uint64_t* vals) {
-    memcpy(&vals_, vals, size * sizeof(uint64_t));
-  }
-  uint32_t TORCH_CSPRNG_HOST_DEVICE random() { auto res = static_cast<uint32_t>(vals_[index]); index++; return res; }
-  uint64_t TORCH_CSPRNG_HOST_DEVICE random64() { auto res = vals_[index]; index++; return res; }
-private:
-  uint64_t vals_[size];
-  int index = 0;
-};
-
-// Applies AES in CTR mode with the `key` for passed TensorIterator iter.
-// `scalar_t`       is a scalar type equivalent of target tensor dtype
-// `uint_t`         is an unsigned integral type of sub-blocks that random state is divided to
-//                  (e.g, 16 bytes random state block can be divided into 16 uint8_t sub-blocks
-//                  or 8 uint16_t sub-block or 4 uint32_t sub-block or 2 uint64_t sub-blocks)
-// `N`              is a number of sub-block which is used by `transform_func`
-//                  to generate a random value of specific distribution (e.g. `normal` uses 2)
-// `key`            is a CUDA pointer to random key memory block
-// `transform_func` is a callable that converts N `uint_t` random state sub-blocks passed in RNGValues into target dtype `scalar_t`
-template<typename scalar_t, typename uint_t, size_t N = 1, typename transform_t>
-void aes_helper(at::TensorIterator& iter, const uint8_t* key_bytes, transform_t transform_func) {
-  auto output = iter.tensor(0);
-  const auto output_offset_calc = make_offset_calculator<1>(at::TensorIterator::nullary_op(output));
-  const auto output_index_calc = [output_offset_calc] TORCH_CSPRNG_HOST_DEVICE (uint32_t li) -> uint32_t {
-      return output_offset_calc.get(li)[0];
-  };
-  torch::csprng::block_cipher<aes::block_t_size>(
-      nullptr, 0, 0, output_index_calc,
-      output.data_ptr(), output.numel(), output.element_size(), output_index_calc,
-      iter.device_type(),
-      [key_bytes] TORCH_CSPRNG_HOST_DEVICE (int64_t idx, uint8_t* block) -> void {
-          uint8_t idx_block[aes::block_t_size];
-          std::memset(&idx_block, 0, aes::block_t_size);
-          *(reinterpret_cast<int64_t*>(idx_block)) = idx;
-          aes::encrypt(idx_block, key_bytes);
-          for (size_t i = 0; i < aes::block_t_size; i++) {
-            block[i] ^= idx_block[i];
-          }
-      },
-      aes::block_t_size / (N * sizeof(uint_t)),
-  [transform_func] TORCH_CSPRNG_HOST_DEVICE (uint8_t* block) {
-    const auto n = aes::block_t_size / (N * sizeof(uint_t));
-    for (size_t i = 0; i < n; ++i) {
-      uint64_t vals[N];
-      for (size_t j = 0; j < N; ++j) {
-        vals[j] = (reinterpret_cast<uint_t*>(block))[N * i + j];
-      }
-      RNGValues<N> rng(vals);
-      reinterpret_cast<scalar_t*>(block)[i] = transform_func(&rng);
-    }
-  }
-  );
-}
-
-// ====================================================================================================================
-
-// A mapping between scalar type and corresponding unsigned integer type of random state sub-block.
-// uint64_t for double and long, uint32_t for the rest
-template <typename T>
-struct UIntType {};
-
-template <> struct UIntType<double> { using type = uint64_t; };
-template <> struct UIntType<float> { using type = uint32_t; };
-template <> struct UIntType<c10::Half> { using type = uint16_t; };
-template <> struct UIntType<c10::BFloat16> { using type = uint16_t; };
-template <> struct UIntType<int64_t> { using type = uint64_t; };
-template <> struct UIntType<int32_t> { using type = uint32_t; };
-template <> struct UIntType<int16_t> { using type = uint32_t; };
-template <> struct UIntType<int8_t> { using type = uint32_t; };
-template <> struct UIntType<uint8_t> { using type = uint32_t; };
-template <> struct UIntType<bool> { using type = uint32_t; };
-
-// ==================================================== Random ========================================================
-
-template<typename RNG>
-struct RandomKernel {
-  void operator()(TensorIterator& iter, c10::optional<Generator> generator) {
-    const Tensor key_t = aes128_key_tensor<RNG>(*generator).to(iter.device());
-    const auto key = key_t.data_ptr<uint8_t>();
-    AT_DISPATCH_ALL_TYPES_AND3(at::ScalarType::Bool, at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "random_kernel", [&] {
-      aes_helper<scalar_t, UIntType<scalar_t>::type>(iter, key,
-        [] TORCH_CSPRNG_HOST_DEVICE (RNGValues<1>* generator) -> scalar_t {
-          uniform_int_distribution<scalar_t> random;
-          return random(generator);
-        }
-      );
-    });
-  }
-};
-
-template<typename scalar_t, typename uint_t>
-void random_from_to_kernel_helper(TensorIterator& iter, uint64_t range, int64_t base, const uint8_t* key) {
-  aes_helper<scalar_t, uint_t>(iter, key,
-    [range, base] TORCH_CSPRNG_HOST_DEVICE (RNGValues<1>* generator) -> scalar_t {
-      uniform_int_from_to_distribution<scalar_t> random(range, base);
-      return random(generator);
-    }
-  );
-}
-
-template<typename scalar_t, typename uint_t>
-void random_full_range_kernel_helper(TensorIterator& iter, const uint8_t* key) {
-  aes_helper<scalar_t, uint_t>(iter, key,
-    [] TORCH_CSPRNG_HOST_DEVICE (RNGValues<1>* generator) -> scalar_t {
-      uniform_int_full_range_distribution<scalar_t> random;
-      return random(generator);
-    }
-  );
-}
-
-template<typename RNG>
-struct RandomFromToKernel {
-  void operator()(TensorIterator& iter, uint64_t range, int64_t base, c10::optional<Generator> generator) {
-    const Tensor key_t = aes128_key_tensor<RNG>(*generator).to(iter.device());
-    const auto key = key_t.data_ptr<uint8_t>();
-    AT_DISPATCH_ALL_TYPES_AND3(at::ScalarType::Bool, at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "random_from_to_kernel", [&] {
-      if ((
-        std::is_same<scalar_t, int64_t>::value ||
-        std::is_same<scalar_t, double>::value ||
-        std::is_same<scalar_t, float>::value ||
-        std::is_same<scalar_t, at::BFloat16>::value)/* TODO: && range >= 1ULL << 32*/)
-      {
-        random_from_to_kernel_helper<scalar_t, uint64_t>(iter, range, base, key);
-      } else {
-        random_from_to_kernel_helper<scalar_t, uint32_t>(iter, range, base, key);
-      }
-    });
-  }
-  void operator()(TensorIterator& iter, c10::optional<Generator> generator) {
-    const Tensor key_t = aes128_key_tensor<RNG>(*generator).to(iter.device());
-    const auto key = key_t.data_ptr<uint8_t>();
-    AT_DISPATCH_ALL_TYPES_AND3(at::ScalarType::Bool, at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "random_full_64_bits_range_kernel", [&] {
-      if (std::is_same<scalar_t, int64_t>::value ||
-          std::is_same<scalar_t, double>::value ||
-          std::is_same<scalar_t, float>::value ||
-          std::is_same<scalar_t, at::BFloat16>::value)
-      {
-        random_full_range_kernel_helper<scalar_t, uint64_t>(iter, key);
-      } else {
-        TORCH_CHECK(false, "random_full_64_bits_range_kernel_cuda handles only int64, double, float and bfloat16");
-      }
-    });
-  }
-};
-
-at::Tensor& random_(at::Tensor& self, c10::optional<at::Generator> generator) {
-  return at::native::templates::random_impl<RandomKernel, CSPRNGGeneratorImpl>(self, generator);
-}
-
-at::Tensor& random_from_to(at::Tensor& self, int64_t from, c10::optional<int64_t> to, c10::optional<at::Generator> generator) {
-  return at::native::templates::random_from_to_impl<RandomFromToKernel, CSPRNGGeneratorImpl>(self, from, to, generator);
-}
-
-at::Tensor& random_to(at::Tensor& self, int64_t to, c10::optional<at::Generator> generator) {
-  return random_from_to(self, 0, to, generator);
-}
-
-// ==================================================== Uniform =======================================================
-
-template<typename RNG>
-struct UniformKernel {
-  void operator()(TensorIterator& iter, double from, double to, c10::optional<Generator> generator) {
-    const Tensor key_t = aes128_key_tensor<RNG>(*generator).to(iter.device());
-    const auto key = key_t.data_ptr<uint8_t>();
-    AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "uniform_kernel", [&] {
-      aes_helper<scalar_t, uint64_t>(iter, key,
-        [from, to] TORCH_CSPRNG_HOST_DEVICE (RNGValues<1>* generator) -> scalar_t {
-          uniform_real_distribution<double> uniform(from, to);
-          return static_cast<scalar_t>(uniform(generator));
-        }
-      );
-    });
-  }
-};
-
-at::Tensor& uniform_(at::Tensor& self, double from, double to, c10::optional<at::Generator> generator) {
-  return at::native::templates::uniform_impl_<UniformKernel, CSPRNGGeneratorImpl>(self, from, to, generator);
-}
-
-// ==================================================== Normal ========================================================
-
-template<typename RNG>
-struct NormalKernel {
-  void operator()(Tensor& self, double mean, double std, c10::optional<Generator> generator) {
-    auto iter = TensorIterator::nullary_op(self);
-    const Tensor key_t = aes128_key_tensor<RNG>(*generator).to(iter.device());
-    const auto key = key_t.data_ptr<uint8_t>();
-    AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "normal_kernel", [&] {
-      aes_helper<scalar_t, uint64_t, 2>(iter, key,
-        [mean, std] TORCH_CSPRNG_HOST_DEVICE (RNGValues<2>* gen) -> scalar_t {
-          normal_distribution<double> normal(mean, std);
-          return static_cast<scalar_t>(normal(gen));
-        }
-      );
-    });
-  }
-};
-
-at::Tensor& normal_(at::Tensor& self, double mean, double std, c10::optional<at::Generator> generator) {
-  return at::native::templates::normal_impl_<NormalKernel, CSPRNGGeneratorImpl>(self, mean, std, generator);
-}
-
-at::Tensor& normal_Tensor_float_out(at::Tensor& output, const at::Tensor& mean, double std, c10::optional<at::Generator> gen) {
-  return at::native::templates::normal_out_impl<NormalKernel, CSPRNGGeneratorImpl>(output, mean, std, gen);
-}
-
-at::Tensor& normal_float_Tensor_out(at::Tensor& output, double mean, const at::Tensor& std, c10::optional<at::Generator> gen) {
-  return at::native::templates::normal_out_impl<NormalKernel, CSPRNGGeneratorImpl>(output, mean, std, gen);
-}
-
-at::Tensor& normal_Tensor_Tensor_out(at::Tensor& output, const at::Tensor& mean, const at::Tensor& std, c10::optional<at::Generator> gen) {
-  return at::native::templates::normal_out_impl<NormalKernel, CSPRNGGeneratorImpl>(output, mean, std, gen);
-}
-
-at::Tensor normal_Tensor_float(const at::Tensor& mean, double std, c10::optional<at::Generator> gen) {
-  return at::native::templates::normal_impl<NormalKernel, CSPRNGGeneratorImpl>(mean, std, gen);
-}
-
-at::Tensor normal_float_Tensor(double mean, const at::Tensor& std, c10::optional<at::Generator> gen) {
-  return at::native::templates::normal_impl<NormalKernel, CSPRNGGeneratorImpl>(mean, std, gen);
-}
-
-at::Tensor normal_Tensor_Tensor(const at::Tensor& mean, const at::Tensor& std, c10::optional<at::Generator> gen) {
-  return at::native::templates::normal_impl<NormalKernel, CSPRNGGeneratorImpl>(mean, std, gen);
-}
-
-// ==================================================== Cauchy ========================================================
-
-template<typename RNG>
-struct CauchyKernel {
-  void operator()(TensorIterator& iter, double median, double sigma, c10::optional<Generator> generator) {
-    const Tensor key_t = aes128_key_tensor<RNG>(*generator).to(iter.device());
-    const auto key = key_t.data_ptr<uint8_t>();
-    AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "cauchy_kernel", [&] {
-      aes_helper<scalar_t, uint64_t, 1>(iter, key,
-        [median, sigma] TORCH_CSPRNG_HOST_DEVICE (RNGValues<1>* gen) -> scalar_t {
-          cauchy_distribution<double> cauchy(median, sigma);
-          return static_cast<scalar_t>(cauchy(gen));
-        }
-      );
-    });
-  }
-};
-
-at::Tensor& cauchy_(at::Tensor& self, double median, double sigma, c10::optional<at::Generator> generator) {
-  return at::native::templates::cauchy_impl_<CauchyKernel, CSPRNGGeneratorImpl>(self, median, sigma, generator);
-}
-
-// ================================================== LogNormal =======================================================
-
-template<typename RNG>
-struct LogNormalKernel {
-  void operator()(TensorIterator& iter, double mean, double std, c10::optional<Generator> generator) {
-    const Tensor key_t = aes128_key_tensor<RNG>(*generator).to(iter.device());
-    const auto key = key_t.data_ptr<uint8_t>();
-    AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "log_normal", [&] {
-      aes_helper<scalar_t, uint64_t, 2>(iter, key,
-        [mean, std] TORCH_CSPRNG_HOST_DEVICE (RNGValues<2>* gen) -> scalar_t {
-          lognormal_distribution<double> logNormal(mean, std);
-          return static_cast<scalar_t>(logNormal(gen));
-        }
-      );
-    });
-  }
-};
-
-at::Tensor& log_normal_(at::Tensor& self, double mean, double std, c10::optional<at::Generator> gen) {
-  return at::native::templates::log_normal_impl_<LogNormalKernel, CSPRNGGeneratorImpl>(self, mean, std, gen);
-}
-
-// ================================================== Geometric =======================================================
-
-template<typename RNG>
-struct GeometricKernel {
-  void operator()(TensorIterator& iter, double p, c10::optional<Generator> generator) {
-    const Tensor key_t = aes128_key_tensor<RNG>(*generator).to(iter.device());
-    const auto key = key_t.data_ptr<uint8_t>();
-    AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "geometric_kernel", [&] {
-      aes_helper<scalar_t, UIntType<scalar_t>::type, 1>(iter, key,
-        [p] TORCH_CSPRNG_HOST_DEVICE (RNGValues<1>* gen) -> scalar_t {
-          geometric_distribution<scalar_t> geometric(p);
-          return geometric(gen);
-        }
-      );
-    });
-  }
-};
-
-at::Tensor& geometric_(at::Tensor& self, double p, c10::optional<at::Generator> gen) {
-  return at::native::templates::geometric_impl_<GeometricKernel, CSPRNGGeneratorImpl>(self, p, gen);
-}
-
-// ================================================== Exponential =====================================================
-
-template<typename RNG>
-struct ExponentialKernel {
-  void operator()(TensorIterator& iter, double lambda, c10::optional<Generator> generator) {
-    const Tensor key_t = aes128_key_tensor<RNG>(*generator).to(iter.device());
-    const auto key = key_t.data_ptr<uint8_t>();
-    AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "exponential_kernel", [&] {
-      aes_helper<scalar_t, uint64_t, 1>(iter, key,
-        [lambda] TORCH_CSPRNG_HOST_DEVICE (RNGValues<1>* gen) -> scalar_t {
-          exponential_distribution<double> exponential(lambda);
-          return static_cast<scalar_t>(exponential(gen));
-        }
-      );
-    });
-  }
-};
-
-at::Tensor& exponential_(at::Tensor& self, double lambda, c10::optional<at::Generator> gen) {
-  return at::native::templates::exponential_impl_<ExponentialKernel, CSPRNGGeneratorImpl>(self, lambda, gen);
-}
-
-// ================================================Encrypt/Decrypt=====================================================
-
-void check_cipher(const std::string& cipher, Tensor key) {
-  if (cipher == "aes128") {
-    TORCH_CHECK(key.element_size() * key.numel() == 16, "key tensor must have 16 bytes(128 bits)");
-  } else {
-    TORCH_CHECK(false, "encrypt/decrypt supports \"aes128\" cipher, \"", cipher, "\" is not supported.");
-  }
-}
-
-void aes_ecb_encrypt(Tensor input, Tensor output, uint8_t* key_bytes) {
-  block_cipher<aes::block_t_size>(input, output,
-    [key_bytes] TORCH_CSPRNG_HOST_DEVICE (int64_t idx, uint8_t* block) -> void {
-      aes::encrypt(block, key_bytes);
-    }
-  );
-}
-
-void aes_ecb_decrypt(Tensor input, Tensor output, uint8_t* key_bytes) {
-  block_cipher<aes::block_t_size>(input, output,
-    [key_bytes] TORCH_CSPRNG_HOST_DEVICE (int64_t idx, uint8_t* block) -> void {
-      aes::decrypt(block, key_bytes);
-    }
-  );
-}
-
-void aes_ctr_encrypt(Tensor input, Tensor output, uint8_t* key_bytes) {
-  block_cipher<aes::block_t_size>(input, output,
-    [key_bytes] TORCH_CSPRNG_HOST_DEVICE (int64_t idx, uint8_t* block) -> void {
-      uint8_t idx_block[aes::block_t_size];
-      std::memset(&idx_block, 0, aes::block_t_size);
-      *(reinterpret_cast<int64_t*>(idx_block)) = idx;
-      aes::encrypt(idx_block, key_bytes);
-      for (size_t i = 0; i < aes::block_t_size; i++) {
-        block[i] ^= idx_block[i];
-      }
-    }
-  );
-}
-
-void aes_ctr_decrypt(Tensor input, Tensor output, uint8_t* key_bytes) {
-  aes_ctr_encrypt(input, output, key_bytes);
-}
-
-Tensor encrypt(Tensor input, Tensor output, Tensor key, const std::string& cipher, const std::string& mode) {
-  TORCH_CHECK(input.device() == output.device() && input.device() == key.device(), "input, output and key tensors must have the same device");
-  const auto output_size_bytes = output.numel() * output.itemsize();
-  const auto input_size_bytes = input.numel() * input.itemsize();
-  const auto input_size_bytes_rounded = (input_size_bytes + aes::block_t_size - 1) / aes::block_t_size * aes::block_t_size;
-  TORCH_CHECK(output_size_bytes == input_size_bytes_rounded,
-              "output size in bytes(", output_size_bytes,
-              ") is not equal to input size in bytes rounded to block size(",
-              input_size_bytes_rounded, ")");
-  check_cipher(cipher, key);
-  const auto key_bytes = reinterpret_cast<uint8_t*>(key.contiguous().data_ptr());
-  if (mode == "ecb") {
-    aes_ecb_encrypt(input, output, key_bytes);
-  } else if (mode == "ctr") {
-    aes_ctr_encrypt(input, output, key_bytes);
-  } else {
-    TORCH_CHECK(false, "encrypt/decrypt supports \"ecb\" and \"ctr\" modes, \"", mode, "\" is not supported.");
-  }
-  return output;
-}
-
-Tensor decrypt(Tensor input, Tensor output, Tensor key, const std::string& cipher, const std::string& mode) {
-  TORCH_CHECK(input.device() == output.device() && input.device() == key.device(), "input, output and key tensors must have the same device");
-  const auto output_size_bytes = output.numel() * output.itemsize();
-  const auto input_size_bytes = input.numel() * input.itemsize();
-  const auto diff = input_size_bytes - output_size_bytes;
-  TORCH_CHECK(0 <= diff && diff < aes::block_t_size, "output tensor size in bytes must be less then or equal to input tensor size in bytes, the difference must be less than block size");
-  TORCH_CHECK(input_size_bytes % aes::block_t_size == 0, "input tensor size in bytes must divisible by cipher block size in bytes");
-  check_cipher(cipher, key);
-  const auto key_bytes = reinterpret_cast<uint8_t*>(key.contiguous().data_ptr());
-  if (mode == "ecb") {
-    aes_ecb_decrypt(input, output, key_bytes);
-  } else if (mode == "ctr") {
-    aes_ctr_decrypt(input, output, key_bytes);
-  } else {
-    TORCH_CHECK(false, "encrypt/decrypt supports \"ecb\" and \"ctr\" modes, \"", mode, "\" is not supported.");
-  }
-  return output;
-}
diff --git a/torchcsprng/csrc/kernels_commons.h b/torchcsprng/csrc/kernels_commons.h
deleted file mode 100644
index 3e74d35..0000000
--- a/torchcsprng/csrc/kernels_commons.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#pragma once
-
-#include <random>
-#include <ATen/Generator.h>
-#include <ATen/Tensor.h>
-#include <ATen/core/DistributionsHelper.h>
-#include <ATen/native/DistributionTemplates.h>
-#include <torch/utils.h>
-#include "macros.h"
-#include "block_cipher.h"
-
-inline uint64_t make64BitsFrom32Bits(uint32_t hi, uint32_t lo) {
-  return (static_cast<uint64_t>(hi) << 32) | lo;
-}
-
-// CUDA CSPRNG is actually CPU generator which is used only to generate a random key on CPU for AES running in a block mode on CUDA
-struct CSPRNGGeneratorImpl : public c10::GeneratorImpl {
-  CSPRNGGeneratorImpl(bool use_rd)              : c10::GeneratorImpl{at::Device(at::DeviceType::CPU), at::DispatchKeySet(at::DispatchKey::CustomRNGKeyId)}, use_rd_{use_rd} {}
-  CSPRNGGeneratorImpl(const std::string& token) : c10::GeneratorImpl{at::Device(at::DeviceType::CPU), at::DispatchKeySet(at::DispatchKey::CustomRNGKeyId)}, use_rd_{true}, rd_{token} {}
-  CSPRNGGeneratorImpl(uint64_t seed)            : c10::GeneratorImpl{at::Device(at::DeviceType::CPU), at::DispatchKeySet(at::DispatchKey::CustomRNGKeyId)}, use_rd_{false}, mt_{static_cast<unsigned int>(seed)} { }
-  ~CSPRNGGeneratorImpl() = default;
-  uint32_t random() { return use_rd_ ? rd_() : mt_(); }
-  uint64_t random64() { return use_rd_ ? make64BitsFrom32Bits(rd_(), rd_()) : make64BitsFrom32Bits(mt_(), mt_()); }
-
-  void set_current_seed(uint64_t seed) override { throw std::runtime_error("not implemented"); }
-  uint64_t current_seed() const override { throw std::runtime_error("not implemented"); }
-  uint64_t seed() override { throw std::runtime_error("not implemented"); }
-  CSPRNGGeneratorImpl* clone_impl() const override { throw std::runtime_error("not implemented"); }
-
-  static at::DeviceType device_type() { return at::DeviceType::CPU; }
-
-  void set_state(const c10::TensorImpl& new_state) override { throw std::runtime_error("not implemented"); }
-  c10::intrusive_ptr<c10::TensorImpl> get_state() const override { throw std::runtime_error("not implemented"); }
-
-  void set_offset(uint64_t offset) override { throw std::runtime_error("not implemented"); }
-  uint64_t get_offset() const override { throw std::runtime_error("not implenented"); }
-  bool use_rd_;
-  std::random_device rd_;
-  std::mt19937 mt_;
-};
diff --git a/torchcsprng/csrc/kernels_decls.inc b/torchcsprng/csrc/kernels_decls.inc
deleted file mode 100644
index 5fa9299..0000000
--- a/torchcsprng/csrc/kernels_decls.inc
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-// ==================================================== Random ========================================================
-
-at::Tensor& random_(at::Tensor& self, c10::optional<at::Generator> generator);
-
-at::Tensor& random_from_to(at::Tensor& self, int64_t from, optional<int64_t> to, c10::optional<at::Generator> generator);
-
-at::Tensor& random_to(at::Tensor& self, int64_t to, c10::optional<at::Generator> generator);
-
-// ==================================================== Uniform =======================================================
-
-at::Tensor& uniform_(at::Tensor& self, double from, double to, c10::optional<at::Generator> generator);
-
-// ==================================================== Normal ========================================================
-
-at::Tensor& normal_(at::Tensor& self, double mean, double std, c10::optional<at::Generator> generator);
-
-at::Tensor& normal_Tensor_float_out(at::Tensor& output, const at::Tensor& mean, double std, c10::optional<at::Generator> gen);
-
-at::Tensor& normal_float_Tensor_out(at::Tensor& output, double mean, const at::Tensor& std, c10::optional<at::Generator> gen);
-
-at::Tensor& normal_Tensor_Tensor_out(at::Tensor& output, const at::Tensor& mean, const at::Tensor& std, c10::optional<at::Generator> gen);
-
-at::Tensor normal_Tensor_float(const at::Tensor& mean, double std, c10::optional<at::Generator> gen);
-
-at::Tensor normal_float_Tensor(double mean, const at::Tensor& std, c10::optional<at::Generator> gen);
-
-at::Tensor normal_Tensor_Tensor(const at::Tensor& mean, const at::Tensor& std, c10::optional<at::Generator> gen);
-
-// ==================================================== Cauchy ========================================================
-
-at::Tensor& cauchy_(at::Tensor& self, double median, double sigma, c10::optional<at::Generator> generator);
-
-// ================================================== LogNormal =======================================================
-
-at::Tensor& log_normal_(at::Tensor& self, double mean, double std, c10::optional<at::Generator> gen);
-
-// ================================================== Geometric =======================================================
-
-at::Tensor& geometric_(at::Tensor& self, double p, c10::optional<at::Generator> gen);
-
-// ================================================== Exponential =====================================================
-
-at::Tensor& exponential_(at::Tensor& self, double lambda, c10::optional<at::Generator> gen);
-
-// ================================================Encrypt/Decrypt=====================================================
-
-Tensor encrypt(Tensor input, Tensor output, Tensor key, const std::string& cipher, const std::string& mode);
-
-Tensor decrypt(Tensor input, Tensor output, Tensor key, const std::string& cipher, const std::string& mode);
diff --git a/torchcsprng/kernels.cu b/torchcsprng/kernels.cu
new file mode 100644
index 0000000..65bb01f
--- /dev/null
+++ b/torchcsprng/kernels.cu
@@ -0,0 +1,484 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "kernels_commons.h"
+
+namespace torch {
+namespace csprng {
+namespace cuda {
+
+// The original kernels_body.inc starts here
+
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "aes.h"
+
+// Generates `block_t_size`-bytes random key Tensor on CPU
+// using `generator`, which must be an instance of `at::CPUGeneratorImpl`
+// and passes it to the `device`.
+template <typename RNG>
+at::Tensor key_tensor(size_t block_t_size, c10::optional<at::Generator> generator) {
+  std::lock_guard<std::mutex> lock(generator->mutex());
+  auto gen = at::check_generator<RNG>(generator);
+  auto key = torch::empty({static_cast<signed long>(block_t_size)}, torch::kUInt8);
+  using random_t = typename std::result_of<decltype (&RNG::random)(RNG)>::type;
+  constexpr size_t random_t_size = sizeof(random_t);
+  for (size_t i = 0; i < block_t_size / random_t_size; i++) {
+    const auto rand = gen->random();
+    for (size_t j = 0; j < random_t_size; j++) {
+      size_t k = i * random_t_size + j;
+      key[k] = static_cast<uint8_t>((rand >> (j * 8)) & 0xff);
+    }
+  }
+  return key;
+}
+
+template <typename RNG>
+at::Tensor aes128_key_tensor(at::Generator generator) {
+  return key_tensor<RNG>(aes::block_t_size, generator);
+}
+
+// ====================================================================================================================
+
+// A simple container for random state sub-blocks that implements RNG interface
+// with random() and random64() methods, that are used by transformation function
+template <size_t size>
+struct RNGValues {
+  TORCH_CSPRNG_HOST_DEVICE RNGValues(uint64_t *vals) {
+    memcpy(&vals_, vals, size * sizeof(uint64_t));
+  }
+  uint32_t TORCH_CSPRNG_HOST_DEVICE random() {
+    auto res = static_cast<uint32_t>(vals_[index]);
+    index++;
+    return res;
+  }
+  uint64_t TORCH_CSPRNG_HOST_DEVICE random64() {
+    auto res = vals_[index];
+    index++;
+    return res;
+  }
+
+ private:
+  uint64_t vals_[size];
+  int index = 0;
+};
+
+// Applies AES in CTR mode with the `key` for passed TensorIterator iter.
+// `scalar_t`       is a scalar type equivalent of target tensor dtype
+// `uint_t`         is an unsigned integral type of sub-blocks that random state is divided to
+//                  (e.g, 16 bytes random state block can be divided into 16 uint8_t sub-blocks
+//                  or 8 uint16_t sub-block or 4 uint32_t sub-block or 2 uint64_t sub-blocks)
+// `N`              is a number of sub-block which is used by `transform_func`
+//                  to generate a random value of specific distribution (e.g. `normal` uses 2)
+// `key`            is a CUDA pointer to random key memory block
+// `transform_func` is a callable that converts N `uint_t` random state sub-blocks passed in RNGValues into target dtype
+// `scalar_t`
+template <typename scalar_t, typename uint_t, size_t N = 1, typename transform_t>
+void aes_helper(at::TensorIterator &iter, const uint8_t *key_bytes, transform_t transform_func) {
+  auto output = iter.tensor(0);
+  const auto output_offset_calc = make_offset_calculator<1>(at::TensorIterator::nullary_op(output));
+  const auto output_index_calc = [output_offset_calc] TORCH_CSPRNG_HOST_DEVICE(
+                                   uint32_t li) -> uint32_t { return output_offset_calc.get(li)[0]; };
+  torch::csprng::block_cipher<aes::block_t_size>(
+    nullptr, 0, 0, output_index_calc, output.data_ptr(), output.numel(), output.element_size(), output_index_calc,
+    iter.device_type(),
+    [key_bytes] TORCH_CSPRNG_HOST_DEVICE(int64_t idx, uint8_t * block) -> void {
+      uint8_t idx_block[aes::block_t_size];
+      std::memset(&idx_block, 0, aes::block_t_size);
+      *(reinterpret_cast<int64_t *>(idx_block)) = idx;
+      aes::encrypt(idx_block, key_bytes);
+      for (size_t i = 0; i < aes::block_t_size; i++) {
+        block[i] ^= idx_block[i];
+      }
+    },
+    aes::block_t_size / (N * sizeof(uint_t)),
+    [transform_func] TORCH_CSPRNG_HOST_DEVICE(uint8_t * block) {
+      const auto n = aes::block_t_size / (N * sizeof(uint_t));
+      for (size_t i = 0; i < n; ++i) {
+        uint64_t vals[N];
+        for (size_t j = 0; j < N; ++j) {
+          vals[j] = (reinterpret_cast<uint_t *>(block))[N * i + j];
+        }
+        RNGValues<N> rng(vals);
+        reinterpret_cast<scalar_t *>(block)[i] = transform_func(&rng);
+      }
+    });
+}
+
+// ====================================================================================================================
+
+// A mapping between scalar type and corresponding unsigned integer type of random state sub-block.
+// uint64_t for double and long, uint32_t for the rest
+template <typename T>
+struct UIntType {};
+
+template <>
+struct UIntType<double> {
+  using type = uint64_t;
+};
+template <>
+struct UIntType<float> {
+  using type = uint32_t;
+};
+template <>
+struct UIntType<c10::Half> {
+  using type = uint16_t;
+};
+template <>
+struct UIntType<c10::BFloat16> {
+  using type = uint16_t;
+};
+template <>
+struct UIntType<int64_t> {
+  using type = uint64_t;
+};
+template <>
+struct UIntType<int32_t> {
+  using type = uint32_t;
+};
+template <>
+struct UIntType<int16_t> {
+  using type = uint32_t;
+};
+template <>
+struct UIntType<int8_t> {
+  using type = uint32_t;
+};
+template <>
+struct UIntType<uint8_t> {
+  using type = uint32_t;
+};
+template <>
+struct UIntType<bool> {
+  using type = uint32_t;
+};
+
+// ==================================================== Random ========================================================
+
+template <typename RNG>
+struct RandomKernel {
+  void operator()(TensorIterator &iter, c10::optional<Generator> generator) {
+    const Tensor key_t = aes128_key_tensor<RNG>(*generator).to(iter.device());
+    const auto key = key_t.data_ptr<uint8_t>();
+    AT_DISPATCH_ALL_TYPES_AND3(
+      at::ScalarType::Bool, at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "random_kernel", [&] {
+        aes_helper<scalar_t, UIntType<scalar_t>::type>(
+          iter, key, [] TORCH_CSPRNG_HOST_DEVICE(RNGValues<1> * generator) -> scalar_t {
+            uniform_int_distribution<scalar_t> random;
+            return random(generator);
+          });
+      });
+  }
+};
+
+template <typename scalar_t, typename uint_t>
+void random_from_to_kernel_helper(TensorIterator &iter, uint64_t range, int64_t base, const uint8_t *key) {
+  aes_helper<scalar_t, uint_t>(iter, key, [range, base] TORCH_CSPRNG_HOST_DEVICE(RNGValues<1> * generator) -> scalar_t {
+    uniform_int_from_to_distribution<scalar_t> random(range, base);
+    return random(generator);
+  });
+}
+
+template <typename scalar_t, typename uint_t>
+void random_full_range_kernel_helper(TensorIterator &iter, const uint8_t *key) {
+  aes_helper<scalar_t, uint_t>(iter, key, [] TORCH_CSPRNG_HOST_DEVICE(RNGValues<1> * generator) -> scalar_t {
+    uniform_int_full_range_distribution<scalar_t> random;
+    return random(generator);
+  });
+}
+
+template <typename RNG>
+struct RandomFromToKernel {
+  void operator()(TensorIterator &iter, uint64_t range, int64_t base, c10::optional<Generator> generator) {
+    const Tensor key_t = aes128_key_tensor<RNG>(*generator).to(iter.device());
+    const auto key = key_t.data_ptr<uint8_t>();
+    AT_DISPATCH_ALL_TYPES_AND3(
+      at::ScalarType::Bool, at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "random_from_to_kernel", [&] {
+        if ((std::is_same<scalar_t, int64_t>::value || std::is_same<scalar_t, double>::value ||
+              std::is_same<scalar_t, float>::value ||
+              std::is_same<scalar_t, at::BFloat16>::value) /* TODO: && range >= 1ULL << 32*/) {
+          random_from_to_kernel_helper<scalar_t, uint64_t>(iter, range, base, key);
+        } else {
+          random_from_to_kernel_helper<scalar_t, uint32_t>(iter, range, base, key);
+        }
+      });
+  }
+  void operator()(TensorIterator &iter, c10::optional<Generator> generator) {
+    const Tensor key_t = aes128_key_tensor<RNG>(*generator).to(iter.device());
+    const auto key = key_t.data_ptr<uint8_t>();
+    AT_DISPATCH_ALL_TYPES_AND3(at::ScalarType::Bool, at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(),
+      "random_full_64_bits_range_kernel", [&] {
+        if (std::is_same<scalar_t, int64_t>::value || std::is_same<scalar_t, double>::value ||
+            std::is_same<scalar_t, float>::value || std::is_same<scalar_t, at::BFloat16>::value) {
+          random_full_range_kernel_helper<scalar_t, uint64_t>(iter, key);
+        } else {
+          TORCH_CHECK(false, "random_full_64_bits_range_kernel_cuda handles only int64, double, float and bfloat16");
+        }
+      });
+  }
+};
+
+at::Tensor &random_(at::Tensor &self, c10::optional<at::Generator> generator) {
+  return at::native::templates::random_impl<RandomKernel, CSPRNGGeneratorImpl>(self, generator);
+}
+
+at::Tensor &random_from_to(
+  at::Tensor &self, int64_t from, c10::optional<int64_t> to, c10::optional<at::Generator> generator) {
+  return at::native::templates::random_from_to_impl<RandomFromToKernel, CSPRNGGeneratorImpl>(self, from, to, generator);
+}
+
+at::Tensor &random_to(at::Tensor &self, int64_t to, c10::optional<at::Generator> generator) {
+  return random_from_to(self, 0, to, generator);
+}
+
+// ==================================================== Uniform =======================================================
+
+template <typename RNG>
+struct UniformKernel {
+  void operator()(TensorIterator &iter, double from, double to, c10::optional<Generator> generator) {
+    const Tensor key_t = aes128_key_tensor<RNG>(*generator).to(iter.device());
+    const auto key = key_t.data_ptr<uint8_t>();
+    AT_DISPATCH_FLOATING_TYPES_AND2(
+      at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "uniform_kernel", [&] {
+        aes_helper<scalar_t, uint64_t>(
+          iter, key, [from, to] TORCH_CSPRNG_HOST_DEVICE(RNGValues<1> * generator) -> scalar_t {
+            uniform_real_distribution<double> uniform(from, to);
+            return static_cast<scalar_t>(uniform(generator));
+          });
+      });
+  }
+};
+
+at::Tensor &uniform_(at::Tensor &self, double from, double to, c10::optional<at::Generator> generator) {
+  return at::native::templates::uniform_impl_<UniformKernel, CSPRNGGeneratorImpl>(self, from, to, generator);
+}
+
+// ==================================================== Normal ========================================================
+
+template <typename RNG>
+struct NormalKernel {
+  void operator()(Tensor &self, double mean, double std, c10::optional<Generator> generator) {
+    auto iter = TensorIterator::nullary_op(self);
+    const Tensor key_t = aes128_key_tensor<RNG>(*generator).to(iter.device());
+    const auto key = key_t.data_ptr<uint8_t>();
+    AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "normal_kernel", [&] {
+      aes_helper<scalar_t, uint64_t, 2>(
+        iter, key, [mean, std] TORCH_CSPRNG_HOST_DEVICE(RNGValues<2> * gen) -> scalar_t {
+          normal_distribution<double> normal(mean, std);
+          return static_cast<scalar_t>(normal(gen));
+        });
+    });
+  }
+};
+
+at::Tensor &normal_(at::Tensor &self, double mean, double std, c10::optional<at::Generator> generator) {
+  return at::native::templates::normal_impl_<NormalKernel, CSPRNGGeneratorImpl>(self, mean, std, generator);
+}
+
+at::Tensor &normal_Tensor_float_out(
+  at::Tensor &output, const at::Tensor &mean, double std, c10::optional<at::Generator> gen) {
+  return at::native::templates::normal_out_impl<NormalKernel, CSPRNGGeneratorImpl>(output, mean, std, gen);
+}
+
+at::Tensor &normal_float_Tensor_out(
+  at::Tensor &output, double mean, const at::Tensor &std, c10::optional<at::Generator> gen) {
+  return at::native::templates::normal_out_impl<NormalKernel, CSPRNGGeneratorImpl>(output, mean, std, gen);
+}
+
+at::Tensor &normal_Tensor_Tensor_out(
+  at::Tensor &output, const at::Tensor &mean, const at::Tensor &std, c10::optional<at::Generator> gen) {
+  return at::native::templates::normal_out_impl<NormalKernel, CSPRNGGeneratorImpl>(output, mean, std, gen);
+}
+
+at::Tensor normal_Tensor_float(const at::Tensor &mean, double std, c10::optional<at::Generator> gen) {
+  return at::native::templates::normal_impl<NormalKernel, CSPRNGGeneratorImpl>(mean, std, gen);
+}
+
+at::Tensor normal_float_Tensor(double mean, const at::Tensor &std, c10::optional<at::Generator> gen) {
+  return at::native::templates::normal_impl<NormalKernel, CSPRNGGeneratorImpl>(mean, std, gen);
+}
+
+at::Tensor normal_Tensor_Tensor(const at::Tensor &mean, const at::Tensor &std, c10::optional<at::Generator> gen) {
+  return at::native::templates::normal_impl<NormalKernel, CSPRNGGeneratorImpl>(mean, std, gen);
+}
+
+// ==================================================== Cauchy ========================================================
+
+template <typename RNG>
+struct CauchyKernel {
+  void operator()(TensorIterator &iter, double median, double sigma, c10::optional<Generator> generator) {
+    const Tensor key_t = aes128_key_tensor<RNG>(*generator).to(iter.device());
+    const auto key = key_t.data_ptr<uint8_t>();
+    AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "cauchy_kernel", [&] {
+      aes_helper<scalar_t, uint64_t, 1>(
+        iter, key, [median, sigma] TORCH_CSPRNG_HOST_DEVICE(RNGValues<1> * gen) -> scalar_t {
+          cauchy_distribution<double> cauchy(median, sigma);
+          return static_cast<scalar_t>(cauchy(gen));
+        });
+    });
+  }
+};
+
+at::Tensor &cauchy_(at::Tensor &self, double median, double sigma, c10::optional<at::Generator> generator) {
+  return at::native::templates::cauchy_impl_<CauchyKernel, CSPRNGGeneratorImpl>(self, median, sigma, generator);
+}
+
+// ================================================== LogNormal =======================================================
+
+template <typename RNG>
+struct LogNormalKernel {
+  void operator()(TensorIterator &iter, double mean, double std, c10::optional<Generator> generator) {
+    const Tensor key_t = aes128_key_tensor<RNG>(*generator).to(iter.device());
+    const auto key = key_t.data_ptr<uint8_t>();
+    AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "log_normal", [&] {
+      aes_helper<scalar_t, uint64_t, 2>(
+        iter, key, [mean, std] TORCH_CSPRNG_HOST_DEVICE(RNGValues<2> * gen) -> scalar_t {
+          lognormal_distribution<double> logNormal(mean, std);
+          return static_cast<scalar_t>(logNormal(gen));
+        });
+    });
+  }
+};
+
+at::Tensor &log_normal_(at::Tensor &self, double mean, double std, c10::optional<at::Generator> gen) {
+  return at::native::templates::log_normal_impl_<LogNormalKernel, CSPRNGGeneratorImpl>(self, mean, std, gen);
+}
+
+// ================================================== Geometric =======================================================
+
+template <typename RNG>
+struct GeometricKernel {
+  void operator()(TensorIterator &iter, double p, c10::optional<Generator> generator) {
+    const Tensor key_t = aes128_key_tensor<RNG>(*generator).to(iter.device());
+    const auto key = key_t.data_ptr<uint8_t>();
+    AT_DISPATCH_FLOATING_TYPES_AND2(
+      at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "geometric_kernel", [&] {
+        aes_helper<scalar_t, UIntType<scalar_t>::type, 1>(
+          iter, key, [p] TORCH_CSPRNG_HOST_DEVICE(RNGValues<1> * gen) -> scalar_t {
+            geometric_distribution<scalar_t> geometric(p);
+            return geometric(gen);
+          });
+      });
+  }
+};
+
+at::Tensor &geometric_(at::Tensor &self, double p, c10::optional<at::Generator> gen) {
+  return at::native::templates::geometric_impl_<GeometricKernel, CSPRNGGeneratorImpl>(self, p, gen);
+}
+
+// ================================================== Exponential =====================================================
+
+template <typename RNG>
+struct ExponentialKernel {
+  void operator()(TensorIterator &iter, double lambda, c10::optional<Generator> generator) {
+    const Tensor key_t = aes128_key_tensor<RNG>(*generator).to(iter.device());
+    const auto key = key_t.data_ptr<uint8_t>();
+    AT_DISPATCH_FLOATING_TYPES_AND2(
+      at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "exponential_kernel", [&] {
+        aes_helper<scalar_t, uint64_t, 1>(iter, key, [lambda] TORCH_CSPRNG_HOST_DEVICE(RNGValues<1> * gen) -> scalar_t {
+          exponential_distribution<double> exponential(lambda);
+          return static_cast<scalar_t>(exponential(gen));
+        });
+      });
+  }
+};
+
+at::Tensor &exponential_(at::Tensor &self, double lambda, c10::optional<at::Generator> gen) {
+  return at::native::templates::exponential_impl_<ExponentialKernel, CSPRNGGeneratorImpl>(self, lambda, gen);
+}
+
+// ================================================Encrypt/Decrypt=====================================================
+
+void check_cipher(const std::string &cipher, Tensor key) {
+  if (cipher == "aes128") {
+    TORCH_CHECK(key.element_size() * key.numel() == 16, "key tensor must have 16 bytes(128 bits)");
+  } else {
+    TORCH_CHECK(false, "encrypt/decrypt supports \"aes128\" cipher, \"", cipher, "\" is not supported.");
+  }
+}
+
+void aes_ecb_encrypt(Tensor input, Tensor output, uint8_t *key_bytes) {
+  block_cipher<aes::block_t_size>(input, output,
+    [key_bytes] TORCH_CSPRNG_HOST_DEVICE(int64_t idx, uint8_t * block) -> void { aes::encrypt(block, key_bytes); });
+}
+
+void aes_ecb_decrypt(Tensor input, Tensor output, uint8_t *key_bytes) {
+  block_cipher<aes::block_t_size>(input, output,
+    [key_bytes] TORCH_CSPRNG_HOST_DEVICE(int64_t idx, uint8_t * block) -> void { aes::decrypt(block, key_bytes); });
+}
+
+void aes_ctr_encrypt(Tensor input, Tensor output, uint8_t *key_bytes) {
+  block_cipher<aes::block_t_size>(
+    input, output, [key_bytes] TORCH_CSPRNG_HOST_DEVICE(int64_t idx, uint8_t * block) -> void {
+      uint8_t idx_block[aes::block_t_size];
+      std::memset(&idx_block, 0, aes::block_t_size);
+      *(reinterpret_cast<int64_t *>(idx_block)) = idx;
+      aes::encrypt(idx_block, key_bytes);
+      for (size_t i = 0; i < aes::block_t_size; i++) {
+        block[i] ^= idx_block[i];
+      }
+    });
+}
+
+void aes_ctr_decrypt(Tensor input, Tensor output, uint8_t *key_bytes) {
+  aes_ctr_encrypt(input, output, key_bytes);
+}
+
+Tensor encrypt(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode) {
+  TORCH_CHECK(input.device() == output.device() && input.device() == key.device(),
+    "input, output and key tensors must have the same device");
+  const auto output_size_bytes = output.numel() * output.itemsize();
+  const auto input_size_bytes = input.numel() * input.itemsize();
+  const auto input_size_bytes_rounded =
+    (input_size_bytes + aes::block_t_size - 1) / aes::block_t_size * aes::block_t_size;
+  TORCH_CHECK(output_size_bytes == input_size_bytes_rounded, "output size in bytes(", output_size_bytes,
+    ") is not equal to input size in bytes rounded to block size(", input_size_bytes_rounded, ")");
+  check_cipher(cipher, key);
+  const auto key_bytes = reinterpret_cast<uint8_t *>(key.contiguous().data_ptr());
+  if (mode == "ecb") {
+    aes_ecb_encrypt(input, output, key_bytes);
+  } else if (mode == "ctr") {
+    aes_ctr_encrypt(input, output, key_bytes);
+  } else {
+    TORCH_CHECK(false, "encrypt/decrypt supports \"ecb\" and \"ctr\" modes, \"", mode, "\" is not supported.");
+  }
+  return output;
+}
+
+Tensor decrypt(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode) {
+  TORCH_CHECK(input.device() == output.device() && input.device() == key.device(),
+    "input, output and key tensors must have the same device");
+  const auto output_size_bytes = output.numel() * output.itemsize();
+  const auto input_size_bytes = input.numel() * input.itemsize();
+  const auto diff = input_size_bytes - output_size_bytes;
+  TORCH_CHECK(0 <= diff && diff < aes::block_t_size,
+    "output tensor size in bytes must be less then or equal to input tensor size in bytes, the difference must be less "
+    "than block size");
+  TORCH_CHECK(input_size_bytes % aes::block_t_size == 0,
+    "input tensor size in bytes must divisible by cipher block size in bytes");
+  check_cipher(cipher, key);
+  const auto key_bytes = reinterpret_cast<uint8_t *>(key.contiguous().data_ptr());
+  if (mode == "ecb") {
+    aes_ecb_decrypt(input, output, key_bytes);
+  } else if (mode == "ctr") {
+    aes_ctr_decrypt(input, output, key_bytes);
+  } else {
+    TORCH_CHECK(false, "encrypt/decrypt supports \"ecb\" and \"ctr\" modes, \"", mode, "\" is not supported.");
+  }
+  return output;
+}
+
+// The original kernels_body.inc ends here
+
+}  // namespace cuda
+}  // namespace csprng
+}  // namespace torch
diff --git a/torchcsprng/kernels.cuh b/torchcsprng/kernels.cuh
new file mode 100644
index 0000000..4467e11
--- /dev/null
+++ b/torchcsprng/kernels.cuh
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <ATen/Generator.h>
+#include <ATen/Tensor.h>
+
+namespace torch {
+namespace csprng {
+namespace cuda {
+
+// The original kernels_body.inc starts here
+
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// ==================================================== Random ========================================================
+
+at::Tensor &random_(at::Tensor &self, c10::optional<at::Generator> generator);
+
+at::Tensor &random_from_to(
+  at::Tensor &self, int64_t from, optional<int64_t> to, c10::optional<at::Generator> generator);
+
+at::Tensor &random_to(at::Tensor &self, int64_t to, c10::optional<at::Generator> generator);
+
+// ==================================================== Uniform =======================================================
+
+at::Tensor &uniform_(at::Tensor &self, double from, double to, c10::optional<at::Generator> generator);
+
+// ==================================================== Normal ========================================================
+
+at::Tensor &normal_(at::Tensor &self, double mean, double std, c10::optional<at::Generator> generator);
+
+at::Tensor &normal_Tensor_float_out(
+  at::Tensor &output, const at::Tensor &mean, double std, c10::optional<at::Generator> gen);
+
+at::Tensor &normal_float_Tensor_out(
+  at::Tensor &output, double mean, const at::Tensor &std, c10::optional<at::Generator> gen);
+
+at::Tensor &normal_Tensor_Tensor_out(
+  at::Tensor &output, const at::Tensor &mean, const at::Tensor &std, c10::optional<at::Generator> gen);
+
+at::Tensor normal_Tensor_float(const at::Tensor &mean, double std, c10::optional<at::Generator> gen);
+
+at::Tensor normal_float_Tensor(double mean, const at::Tensor &std, c10::optional<at::Generator> gen);
+
+at::Tensor normal_Tensor_Tensor(const at::Tensor &mean, const at::Tensor &std, c10::optional<at::Generator> gen);
+
+// ==================================================== Cauchy ========================================================
+
+at::Tensor &cauchy_(at::Tensor &self, double median, double sigma, c10::optional<at::Generator> generator);
+
+// ================================================== LogNormal =======================================================
+
+at::Tensor &log_normal_(at::Tensor &self, double mean, double std, c10::optional<at::Generator> gen);
+
+// ================================================== Geometric =======================================================
+
+at::Tensor &geometric_(at::Tensor &self, double p, c10::optional<at::Generator> gen);
+
+// ================================================== Exponential =====================================================
+
+at::Tensor &exponential_(at::Tensor &self, double lambda, c10::optional<at::Generator> gen);
+
+// ================================================Encrypt/Decrypt=====================================================
+
+Tensor encrypt(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode);
+
+Tensor decrypt(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode);
+
+// The original kernels_body.inc ends here
+
+}  // namespace cuda
+}  // namespace csprng
+}  // namespace torch
diff --git a/torchcsprng/kernels_commons.h b/torchcsprng/kernels_commons.h
new file mode 100644
index 0000000..359ba2b
--- /dev/null
+++ b/torchcsprng/kernels_commons.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <random>
+#include <ATen/Generator.h>
+#include <ATen/Tensor.h>
+#include <ATen/core/DistributionsHelper.h>
+#include <ATen/native/DistributionTemplates.h>
+#include <torch/utils.h>
+#include "macros.h"
+#include "block_cipher.h"
+
+inline uint64_t make64BitsFrom32Bits(uint32_t hi, uint32_t lo) {
+  return (static_cast<uint64_t>(hi) << 32) | lo;
+}
+
+// CUDA CSPRNG is actually CPU generator which is used only to generate a random key on CPU for AES running in a block
+// mode on CUDA
+struct CSPRNGGeneratorImpl : public c10::GeneratorImpl {
+  CSPRNGGeneratorImpl(bool use_rd)
+      : c10::GeneratorImpl{at::Device(at::DeviceType::CPU), at::DispatchKeySet(at::DispatchKey::CustomRNGKeyId)},
+        use_rd_{use_rd} {}
+  CSPRNGGeneratorImpl(const std::string &token)
+      : c10::GeneratorImpl{at::Device(at::DeviceType::CPU), at::DispatchKeySet(at::DispatchKey::CustomRNGKeyId)},
+        use_rd_{true},
+        rd_{token} {}
+  CSPRNGGeneratorImpl(uint64_t seed)
+      : c10::GeneratorImpl{at::Device(at::DeviceType::CPU), at::DispatchKeySet(at::DispatchKey::CustomRNGKeyId)},
+        use_rd_{false},
+        mt_{static_cast<unsigned int>(seed)} {}
+  ~CSPRNGGeneratorImpl() = default;
+  uint32_t random() {
+    return use_rd_ ? rd_() : mt_();
+  }
+  uint64_t random64() {
+    return use_rd_ ? make64BitsFrom32Bits(rd_(), rd_()) : make64BitsFrom32Bits(mt_(), mt_());
+  }
+
+  void set_current_seed(uint64_t seed) override {
+    throw std::runtime_error("not implemented");
+  }
+  uint64_t current_seed() const override {
+    throw std::runtime_error("not implemented");
+  }
+  uint64_t seed() override {
+    throw std::runtime_error("not implemented");
+  }
+  CSPRNGGeneratorImpl *clone_impl() const override {
+    throw std::runtime_error("not implemented");
+  }
+
+  static at::DeviceType device_type() {
+    return at::DeviceType::CPU;
+  }
+
+  void set_state(const c10::TensorImpl &new_state) override {
+    throw std::runtime_error("not implemented");
+  }
+  c10::intrusive_ptr<c10::TensorImpl> get_state() const override {
+    throw std::runtime_error("not implemented");
+  }
+
+  void set_offset(uint64_t offset) override {
+    throw std::runtime_error("not implemented");
+  }
+  uint64_t get_offset() const override {
+    throw std::runtime_error("not implenented");
+  }
+  bool use_rd_;
+  std::random_device rd_;
+  std::mt19937 mt_;
+};
diff --git a/torchcsprng/csrc/macros.h b/torchcsprng/macros.h
similarity index 100%
rename from torchcsprng/csrc/macros.h
rename to torchcsprng/macros.h

From 32429599a19768474db1071491921af6f9190bc8 Mon Sep 17 00:00:00 2001
From: myl7 <myl@myl.moe>
Date: Mon, 11 Dec 2023 16:01:22 +0800
Subject: [PATCH 03/10] Add wrapper and build config

Remove old build config.
Move the version file.
---
 CMakeLists.txt                         |  30 ++++
 lib.cpp                                |  28 ++++
 lib.h                                  |   8 +
 setup.py                               | 193 -------------------------
 torchcsprng/block_cipher.h             |   2 +
 version.txt => torchcsprng/version.txt |   0
 6 files changed, 68 insertions(+), 193 deletions(-)
 create mode 100644 CMakeLists.txt
 create mode 100644 lib.cpp
 create mode 100644 lib.h
 delete mode 100644 setup.py
 rename version.txt => torchcsprng/version.txt (100%)

diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..b5a151c
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,30 @@
+# Copyright (C) myl7
+# SPDX-License-Identifier: BSD-3-Clause
+
+cmake_minimum_required(VERSION 3.28)
+project(fss-prg-cuda LANGUAGES CUDA CXX)
+set(CMAKE_CXX_STANDARD 17)
+
+include(CheckLanguage)
+check_language(CUDA)
+
+find_package(Torch REQUIRED)
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
+
+add_library(
+  fssprgcuda SHARED
+  lib.cpp
+  lib.h
+  torchcsprng/THCIntegerDivider.cuh
+  torchcsprng/aes.h
+  torchcsprng/kernels_commons.h
+  torchcsprng/kernels.cuh
+  torchcsprng/csprng.cpp
+  torchcsprng/block_cipher.h
+  torchcsprng/macros.h
+  torchcsprng/OffsetCalculator.cuh
+  torchcsprng/kernels.cu
+)
+set_target_properties(fssprgcuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+target_link_libraries(fssprgcuda "${TORCH_LIBRARIES}")
+target_compile_options(fssprgcuda PRIVATE $<$<COMPILE_LANGUAGE: CUDA>: --expt-extended-lambda>)
diff --git a/lib.cpp b/lib.cpp
new file mode 100644
index 0000000..9f0ec19
--- /dev/null
+++ b/lib.cpp
@@ -0,0 +1,28 @@
+// Copyright (C) myl7
+// SPDX-License-Identifier: BSD-3-Clause
+
+#include "lib.h"
+
+#include <torch/torch.h>
+
+using torch::Tensor;
+
+extern Tensor encrypt(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode);
+
+constexpr size_t block_t_size = 16;
+
+void csprng_matyas_meyer_oseas_aes128(uint8_t *buf, int64_t buf_size, const uint8_t *key) {
+  const auto input_size_bytes = buf_size;
+  TORCH_CHECK(input_size_bytes % block_t_size == 0, "input size in bytes(", input_size_bytes,
+    ") is not a multiple of block size(", block_t_size, ")");
+  Tensor input = torch::from_blob(buf, {input_size_bytes}, torch::kUInt8).to(torch::kCUDA);
+
+  const auto output_size_bytes = input_size_bytes;
+  Tensor output = torch::empty({output_size_bytes}, torch::kUInt8);
+
+  const auto key_size_bytes = 16;
+  Tensor key_tensor = torch::from_blob(const_cast<uint8_t *>(key), {key_size_bytes}, torch::kUInt8).to(torch::kCUDA);
+
+  encrypt(input, output, key_tensor, "aes128", "ecb");
+  input ^= output;
+}
diff --git a/lib.h b/lib.h
new file mode 100644
index 0000000..287f9ec
--- /dev/null
+++ b/lib.h
@@ -0,0 +1,8 @@
+// Copyright (C) myl7
+// SPDX-License-Identifier: BSD-3-Clause
+
+#pragma once
+
+#include <stdint.h>
+
+void csprng_matyas_meyer_oseas_aes128(uint8_t *buf, int64_t buf_size, const uint8_t *key);
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 5143b53..0000000
--- a/setup.py
+++ /dev/null
@@ -1,193 +0,0 @@
-import distutils.command.clean
-import glob
-import os
-import shutil
-import subprocess
-import sys
-
-import torch
-from setuptools import find_packages, setup
-from torch.utils.cpp_extension import (
-    BuildExtension,
-    CppExtension,
-    CUDA_HOME,
-    CUDAExtension,
-)
-
-version = open("version.txt", "r").read().strip()
-sha = "Unknown"
-package_name = "torchcsprng"
-
-cwd = os.path.dirname(os.path.abspath(__file__))
-
-try:
-    sha = (
-        subprocess.check_output(["git", "rev-parse", "HEAD"], cwd=cwd)
-        .decode("ascii")
-        .strip()
-    )
-except Exception:
-    pass
-
-if os.getenv("BUILD_VERSION"):
-    version = os.getenv("BUILD_VERSION")
-elif sha != "Unknown":
-    version += "+" + sha[:7]
-print("Building wheel {}-{}".format(package_name, version))
-
-
-def write_version_file():
-    version_path = os.path.join(cwd, "torchcsprng", "version.py")
-    with open(version_path, "w") as f:
-        f.write("__version__ = '{}'\n".format(version))
-        f.write("git_version = {}\n".format(repr(sha)))
-        # f.write("from torchcsprng.extension import _check_cuda_version\n")
-        # f.write("if _check_cuda_version() > 0:\n")
-        # f.write("    cuda = _check_cuda_version()\n")
-
-
-write_version_file()
-
-with open("README.md", "r") as fh:
-    long_description = fh.read()
-
-
-requirements = [
-    "torch",
-]
-
-
-def append_flags(flags, flags_to_append):
-    for flag in flags_to_append:
-        if not flag in flags:
-            flags.append(flag)
-    return flags
-
-
-def get_extensions():
-    build_cuda = torch.cuda.is_available() or os.getenv("FORCE_CUDA", "0") == "1"
-
-    module_name = "torchcsprng"
-
-    extensions_dir = os.path.join(cwd, module_name, "csrc")
-
-    openmp = "ATen parallel backend: OpenMP" in torch.__config__.parallel_info()
-
-    main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
-    source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
-
-    sources = main_file + source_cpu
-    extension = CppExtension
-
-    define_macros = []
-
-    cxx_flags = os.getenv("CXX_FLAGS", "")
-    if cxx_flags == "":
-        cxx_flags = []
-    else:
-        cxx_flags = cxx_flags.split(" ")
-    if openmp:
-        if sys.platform == "linux":
-            cxx_flags = append_flags(cxx_flags, ["-fopenmp"])
-        elif sys.platform == "win32":
-            cxx_flags = append_flags(cxx_flags, ["/openmp"])
-        # elif sys.platform == 'darwin':
-        #     cxx_flags = append_flags(cxx_flags, ['-Xpreprocessor', '-fopenmp'])
-
-    if build_cuda:
-        extension = CUDAExtension
-        source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
-        sources += source_cuda
-
-        define_macros += [("WITH_CUDA", None)]
-
-        nvcc_flags = os.getenv("NVCC_FLAGS", "")
-        if nvcc_flags == "":
-            nvcc_flags = []
-        else:
-            nvcc_flags = nvcc_flags.split(" ")
-        nvcc_flags = append_flags(nvcc_flags, ["--expt-extended-lambda", "-Xcompiler"])
-        extra_compile_args = {
-            "cxx": cxx_flags,
-            "nvcc": nvcc_flags,
-        }
-    else:
-        extra_compile_args = {
-            "cxx": cxx_flags,
-        }
-
-    ext_modules = [
-        extension(
-            module_name + "._C",
-            sources,
-            define_macros=define_macros,
-            extra_compile_args=extra_compile_args,
-        )
-    ]
-
-    return ext_modules
-
-
-class clean(distutils.command.clean.clean):
-    def run(self):
-        with open(".gitignore", "r") as f:
-            ignores = f.read()
-            start_deleting = False
-            for wildcard in filter(None, ignores.split("\n")):
-                if (
-                    wildcard
-                    == "# do not change or delete this comment - `python setup.py clean` deletes everything after this line"
-                ):
-                    start_deleting = True
-                if not start_deleting:
-                    continue
-                for filename in glob.glob(wildcard):
-                    try:
-                        os.remove(filename)
-                    except OSError:
-                        shutil.rmtree(filename, ignore_errors=True)
-
-        # It's an old-style class in Python 2.7...
-        distutils.command.clean.clean.run(self)
-
-
-setup(
-    # Metadata
-    name=package_name,
-    version=version,
-    author="Pavel Belevich",
-    author_email="pbelevich@fb.com",
-    url="https://github.com/pytorch/csprng",
-    description="Cryptographically secure pseudorandom number generators for PyTorch",
-    long_description=long_description,
-    long_description_content_type="text/markdown",
-    license="BSD-3",
-    # Package info
-    packages=find_packages(exclude=("test",)),
-    classifiers=[
-        "Intended Audience :: Developers",
-        "Intended Audience :: Education",
-        "Intended Audience :: Science/Research",
-        "License :: OSI Approved :: BSD License",
-        "Programming Language :: C++",
-        "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.6",
-        "Programming Language :: Python :: 3.7",
-        "Programming Language :: Python :: 3.8",
-        "Programming Language :: Python :: 3.9",
-        "Topic :: Scientific/Engineering",
-        "Topic :: Scientific/Engineering :: Mathematics",
-        "Topic :: Scientific/Engineering :: Artificial Intelligence",
-        "Topic :: Software Development",
-        "Topic :: Software Development :: Libraries",
-        "Topic :: Software Development :: Libraries :: Python Modules",
-    ],
-    python_requires=">=3.6",
-    install_requires=requirements,
-    ext_modules=get_extensions(),
-    test_suite="test",
-    cmdclass={
-        "build_ext": BuildExtension,
-        "clean": clean,
-    },
-)
diff --git a/torchcsprng/block_cipher.h b/torchcsprng/block_cipher.h
index aeae133..8ab8080 100644
--- a/torchcsprng/block_cipher.h
+++ b/torchcsprng/block_cipher.h
@@ -60,6 +60,8 @@ TORCH_CSPRNG_HOST_DEVICE static void block_cipher_kernel_helper(int64_t idx, cip
   int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc, transform_t transform) {
   uint8_t block[block_size];
   std::memset(&block, 0, block_size);  // is it ok to use zeros as padding?
+  // In this application, we require users to pass in the input that is a multiple of block_size.
+  // So zero padding never actually happens and it is ok.
   if (input_ptr != nullptr) {
     copy_input_to_block(idx, block, block_size, input_ptr, input_numel, input_type_size, input_index_calc);
   }
diff --git a/version.txt b/torchcsprng/version.txt
similarity index 100%
rename from version.txt
rename to torchcsprng/version.txt

From cca573341ea8d05ae9a81ccf5af85f50b45bd639 Mon Sep 17 00:00:00 2001
From: myl7 <myl@myl.moe>
Date: Wed, 3 Jan 2024 20:44:36 +0800
Subject: [PATCH 04/10] Update build config

Update gitignore.
Update include in code.
---
 .gitignore     | 36 ++++++++++++++++++++++++++++++++++++
 CMakeLists.txt | 14 +++++++++++++-
 lib.cpp        |  1 -
 lib.h          |  2 +-
 4 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/.gitignore b/.gitignore
index 11d8384..f683121 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,39 @@
+# Prerequisites
+*.d
+
+# Compiled Object files
+*.slo
+*.lo
+*.o
+*.obj
+
+# Precompiled Headers
+*.gch
+*.pch
+
+# Compiled Dynamic libraries
+*.so
+*.dylib
+*.dll
+
+# Fortran module files
+*.mod
+*.smod
+
+# Compiled Static libraries
+*.lai
+*.la
+*.a
+*.lib
+
+# Executables
+*.exe
+*.out
+*.app
+
 # IDE
 .idea
 .vscode
+
+# Build
+/build
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b5a151c..04ea01f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,16 +1,28 @@
 # Copyright (C) myl7
 # SPDX-License-Identifier: BSD-3-Clause
 
-cmake_minimum_required(VERSION 3.28)
+cmake_minimum_required(VERSION 3.22)
+# Set env `CUDACXX=/absolute/path/to/nvcc` to enable the CUDA language
+# if nvcc is not in the PATH.
 project(fss-prg-cuda LANGUAGES CUDA CXX)
 set(CMAKE_CXX_STANDARD 17)
 
 include(CheckLanguage)
 check_language(CUDA)
 
+# Pass `-DCMAKE_PREFIX_PATH=/absolute/path/to/libtorch` to cmake to locate LibTorch.
+# If the package cmake config is confused with a soft link from /usr/local/cuda-* to /usr/local/cuda,
+# pass `-DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-*` to cmake to locate CUDA.
+# Pass `-DGPU_ARCHS=<NUMBER>` to cmake to specify the CPU archtecture.
+# You can check the value alternatively at https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ .
+# This value set by LibTorch (version that supports CUDA 12.1) has arch that is not supported by CUDA 12.1 instead.
 find_package(Torch REQUIRED)
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
 
+# Fix Python.h not found even though it is just located in /usr/include/python*
+find_package(PythonLibs REQUIRED)
+include_directories(${PYTHON_INCLUDE_DIRS})
+
 add_library(
   fssprgcuda SHARED
   lib.cpp
diff --git a/lib.cpp b/lib.cpp
index 9f0ec19..0daa1f2 100644
--- a/lib.cpp
+++ b/lib.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: BSD-3-Clause
 
 #include "lib.h"
-
 #include <torch/torch.h>
 
 using torch::Tensor;
diff --git a/lib.h b/lib.h
index 287f9ec..b809e76 100644
--- a/lib.h
+++ b/lib.h
@@ -3,6 +3,6 @@
 
 #pragma once
 
-#include <stdint.h>
+#include <cstdint>
 
 void csprng_matyas_meyer_oseas_aes128(uint8_t *buf, int64_t buf_size, const uint8_t *key);

From 83633aa33e51bca3398ddb01fd8b46f0b9535b14 Mon Sep 17 00:00:00 2001
From: myl7 <myl@myl.moe>
Date: Thu, 4 Jan 2024 02:01:40 +0800
Subject: [PATCH 05/10] =?UTF-8?q?Impl=20Matyas=E2=80=93Meyer=E2=80=93Oseas?=
 =?UTF-8?q?=20inside=20kernel?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix cmake language detection.
Remove unused code.
Only init round key once.
---
 CMakeLists.txt                                |  16 +-
 lib.cpp                                       |   1 -
 torchcsprng/THCIntegerDivider.cuh             |   7 +-
 torchcsprng/{aes.h => aes.cu}                 |  36 +-
 torchcsprng/aes.cuh                           |  21 +
 .../{block_cipher.h => block_cipher.cuh}      |  40 +-
 torchcsprng/csprng.cpp                        | 292 ------------
 torchcsprng/kernels.cu                        | 433 +-----------------
 torchcsprng/kernels.cuh                       |  53 +--
 ...{kernels_commons.h => kernels_commons.cuh} |   4 +-
 torchcsprng/{macros.h => macros.cuh}          |   1 +
 torchcsprng/owcf.cu                           |  30 ++
 torchcsprng/owcf.cuh                          |  17 +
 13 files changed, 107 insertions(+), 844 deletions(-)
 rename torchcsprng/{aes.h => aes.cu} (92%)
 create mode 100644 torchcsprng/aes.cuh
 rename torchcsprng/{block_cipher.h => block_cipher.cuh} (74%)
 delete mode 100644 torchcsprng/csprng.cpp
 rename torchcsprng/{kernels_commons.h => kernels_commons.cuh} (98%)
 rename torchcsprng/{macros.h => macros.cuh} (94%)
 create mode 100644 torchcsprng/owcf.cu
 create mode 100644 torchcsprng/owcf.cuh

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 04ea01f..1899a4b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -27,15 +27,17 @@ add_library(
   fssprgcuda SHARED
   lib.cpp
   lib.h
-  torchcsprng/THCIntegerDivider.cuh
-  torchcsprng/aes.h
-  torchcsprng/kernels_commons.h
+  torchcsprng/kernels.cu
   torchcsprng/kernels.cuh
-  torchcsprng/csprng.cpp
-  torchcsprng/block_cipher.h
-  torchcsprng/macros.h
+  torchcsprng/macros.cuh
+  torchcsprng/kernels_commons.cuh
+  torchcsprng/block_cipher.cuh
   torchcsprng/OffsetCalculator.cuh
-  torchcsprng/kernels.cu
+  torchcsprng/THCIntegerDivider.cuh
+  torchcsprng/owcf.cu
+  torchcsprng/owcf.cuh
+  torchcsprng/aes.cu
+  torchcsprng/aes.cuh
 )
 set_target_properties(fssprgcuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
 target_link_libraries(fssprgcuda "${TORCH_LIBRARIES}")
diff --git a/lib.cpp b/lib.cpp
index 0daa1f2..dfb406e 100644
--- a/lib.cpp
+++ b/lib.cpp
@@ -23,5 +23,4 @@ void csprng_matyas_meyer_oseas_aes128(uint8_t *buf, int64_t buf_size, const uint
   Tensor key_tensor = torch::from_blob(const_cast<uint8_t *>(key), {key_size_bytes}, torch::kUInt8).to(torch::kCUDA);
 
   encrypt(input, output, key_tensor, "aes128", "ecb");
-  input ^= output;
 }
diff --git a/torchcsprng/THCIntegerDivider.cuh b/torchcsprng/THCIntegerDivider.cuh
index b7dfb6a..8e67945 100644
--- a/torchcsprng/THCIntegerDivider.cuh
+++ b/torchcsprng/THCIntegerDivider.cuh
@@ -8,9 +8,12 @@
 #ifndef THC_INTEGER_DIVIDER_INC
 #define THC_INTEGER_DIVIDER_INC
 
-#include <assert.h>
-#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
+#include <cassert>
+// #if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
+#if defined(__CUDACC__) || defined(__HIPCC__)
 #include <cuda_runtime.h>
+#else
+#error "CUDA not found"
 #endif
 
 // A utility class to implement integer division by multiplication, given a fixed
diff --git a/torchcsprng/aes.h b/torchcsprng/aes.cu
similarity index 92%
rename from torchcsprng/aes.h
rename to torchcsprng/aes.cu
index 32041f3..38c5c28 100644
--- a/torchcsprng/aes.h
+++ b/torchcsprng/aes.cu
@@ -5,6 +5,8 @@
  * LICENSE file in the root directory of this source tree.
  */
 
+#include "aes.cuh"
+
 namespace aes {
 
 // This AES implementation is based on
@@ -55,8 +57,6 @@ namespace aes {
 #define Nr 10  // The number of rounds in AES Cipher.
 #endif
 
-constexpr size_t block_t_size = 16;
-
 typedef uint8_t state_t[4][4];
 
 // The lookup-tables are marked const so they can be placed in read-only storage instead of RAM
@@ -102,8 +102,7 @@ TORCH_CSPRNG_CONSTANT const uint8_t Rcon[11] = {0x8d, 0x01, 0x02, 0x04, 0x08, 0x
 
 #define getSBoxInvert(num) (rsbox[(num)])
 
-// This function produces Nb(Nr+1) round keys. The round keys are used in each round to decrypt the states.
-TORCH_CSPRNG_HOST_DEVICE void KeyExpansion(uint8_t *RoundKey, const uint8_t *Key) {
+void KeyExpansion(uint8_t *RoundKey, const uint8_t *Key) {
   unsigned int i, j, k;
   uint8_t tempa[4];  // Used for the column/row operations
 
@@ -312,10 +311,7 @@ TORCH_CSPRNG_HOST_DEVICE void InvShiftRows(state_t *state) {
   (*state)[3][3] = temp;
 }
 
-TORCH_CSPRNG_HOST_DEVICE void encrypt(uint8_t *state, const uint8_t *key) {
-  uint8_t RoundKey[176];
-  KeyExpansion(RoundKey, key);
-
+TORCH_CSPRNG_HOST_DEVICE void encrypt_with_round_key(uint8_t *state, const uint8_t *RoundKey) {
   uint8_t round = 0;
 
   // Add the First round key to the state before starting the rounds.
@@ -338,28 +334,4 @@ TORCH_CSPRNG_HOST_DEVICE void encrypt(uint8_t *state, const uint8_t *key) {
   AddRoundKey(Nr, (state_t *)state, RoundKey);
 }
 
-TORCH_CSPRNG_HOST_DEVICE void decrypt(uint8_t *state, const uint8_t *key) {
-  uint8_t RoundKey[176];
-  KeyExpansion(RoundKey, key);
-
-  uint8_t round = 0;
-
-  // Add the First round key to the state before starting the rounds.
-  AddRoundKey(Nr, (state_t *)state, RoundKey);
-
-  // There will be Nr rounds.
-  // The first Nr-1 rounds are identical.
-  // These Nr rounds are executed in the loop below.
-  // Last one without InvMixColumn()
-  for (round = (Nr - 1);; --round) {
-    InvShiftRows((state_t *)state);
-    InvSubBytes((state_t *)state);
-    AddRoundKey(round, (state_t *)state, RoundKey);
-    if (round == 0) {
-      break;
-    }
-    InvMixColumns((state_t *)state);
-  }
-}
-
 }  // namespace aes
diff --git a/torchcsprng/aes.cuh b/torchcsprng/aes.cuh
new file mode 100644
index 0000000..e3e2eab
--- /dev/null
+++ b/torchcsprng/aes.cuh
@@ -0,0 +1,21 @@
+// Copyright (C) myl7
+// SPDX-License-Identifier: BSD-3-Clause
+
+#pragma once
+
+#include <cstdint>
+#include <cstddef>
+#include "macros.cuh"
+
+namespace aes {
+
+constexpr size_t block_t_size = 16;
+constexpr size_t round_key_t_size = 176;
+
+TORCH_CSPRNG_HOST_DEVICE void encrypt_with_round_key(uint8_t *state, const uint8_t *RoundKey);
+// This function produces Nb(Nr+1) round keys. The round keys are used in each round to decrypt the states.
+//
+// In our usecase, not run on GPU since it is only run once.
+void KeyExpansion(uint8_t *RoundKey, const uint8_t *Key);
+
+}  // namespace aes
diff --git a/torchcsprng/block_cipher.h b/torchcsprng/block_cipher.cuh
similarity index 74%
rename from torchcsprng/block_cipher.h
rename to torchcsprng/block_cipher.cuh
index 8ab8080..d36bf94 100644
--- a/torchcsprng/block_cipher.h
+++ b/torchcsprng/block_cipher.cuh
@@ -7,7 +7,7 @@
 
 #pragma once
 
-#include "macros.h"
+#include "macros.cuh"
 #include <ATen/ATen.h>
 #include <ATen/native/TensorIterator.h>
 #include "OffsetCalculator.cuh"
@@ -18,11 +18,14 @@
 #if defined(__CUDACC__) || defined(__HIPCC__)
 #include <c10/cuda/CUDAStream.h>
 #include <ATen/cuda/Exceptions.h>
+#else
+#error "CUDA not found"
 #endif
 
 #if defined(__CUDACC__) || defined(__HIPCC__)
 #define UNROLL_IF_CUDA #pragma unroll
 #else
+#error "CUDA not found"
 #define UNROLL_IF_CUDA
 #endif
 
@@ -81,36 +84,10 @@ __global__ static void block_cipher_kernel_cuda(cipher_t cipher, int output_elem
   block_cipher_kernel_helper<block_size>(idx, cipher, output_elem_per_block, input_ptr, input_numel, input_type_size,
     input_index_calc, output_ptr, output_numel, output_type_size, output_index_calc, transform);
 }
+#else
+#error "CUDA not found"
 #endif
 
-template <int block_size, typename cipher_t, typename input_index_calc_t, typename output_index_calc_t,
-  typename transform_t>
-static void block_cipher_kernel_cpu_serial(int64_t begin, int64_t end, cipher_t cipher, int output_elem_per_block,
-  void *input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, void *output_ptr,
-  int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc, transform_t transform) {
-  for (auto idx = begin; idx < end; ++idx) {
-    block_cipher_kernel_helper<block_size>(idx, cipher, output_elem_per_block, input_ptr, input_numel, input_type_size,
-      input_index_calc, output_ptr, output_numel, output_type_size, output_index_calc, transform);
-  }
-}
-
-template <int block_size, typename cipher_t, typename input_index_calc_t, typename output_index_calc_t,
-  typename transform_t>
-static void block_cipher_kernel_cpu(int64_t total, cipher_t cipher, int output_elem_per_block, void *input_ptr,
-  int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, void *output_ptr, int64_t output_numel,
-  int output_type_size, output_index_calc_t output_index_calc, transform_t transform_func) {
-  if (total < at::internal::GRAIN_SIZE || at::get_num_threads() == 1) {
-    block_cipher_kernel_cpu_serial<block_size>(0, total, cipher, output_elem_per_block, input_ptr, input_numel,
-      input_type_size, input_index_calc, output_ptr, output_numel, output_type_size, output_index_calc, transform_func);
-  } else {
-    at::parallel_for(0, total, at::internal::GRAIN_SIZE, [&](int64_t begin, int64_t end) {
-      block_cipher_kernel_cpu_serial<block_size>(begin, end, cipher, output_elem_per_block, input_ptr, input_numel,
-        input_type_size, input_index_calc, output_ptr, output_numel, output_type_size, output_index_calc,
-        transform_func);
-    });
-  }
-}
-
 template <int block_size, typename cipher_t, typename input_index_calc_t, typename output_index_calc_t,
   typename transform_t>
 void block_cipher(void *input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc,
@@ -121,9 +98,7 @@ void block_cipher(void *input_ptr, int64_t input_numel, int input_type_size, inp
   }
 
   if (device.type() == at::kCPU) {
-    const auto total = (output_numel + output_elem_per_block - 1) / output_elem_per_block;
-    block_cipher_kernel_cpu<block_size>(total, cipher, output_elem_per_block, input_ptr, input_numel, input_type_size,
-      input_index_calc, output_ptr, output_numel, output_type_size, output_index_calc, transform_func);
+    TORCH_CHECK(false, "torchcsprng was compiled with only CUDA support");
   } else if (device.type() == at::kCUDA) {
 #if defined(__CUDACC__) || defined(__HIPCC__)
     const auto threads = 256;
@@ -134,6 +109,7 @@ void block_cipher(void *input_ptr, int64_t input_numel, int input_type_size, inp
       transform_func);
     AT_CUDA_CHECK(cudaGetLastError());
 #else
+#error "CUDA not found"
     TORCH_CHECK(false, "torchcsprng was compiled without CUDA support");
 #endif
   } else {
diff --git a/torchcsprng/csprng.cpp b/torchcsprng/csprng.cpp
deleted file mode 100644
index 26c527d..0000000
--- a/torchcsprng/csprng.cpp
+++ /dev/null
@@ -1,292 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#include <torch/extension.h>
-#include <torch/library.h>
-
-#include <ATen/Generator.h>
-#include <ATen/Tensor.h>
-#include <ATen/core/op_registration/op_registration.h>
-
-#include "kernels_commons.h"
-#include "kernels.cuh"
-
-using namespace at;
-using namespace torch::csprng;
-
-static const auto GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE = "generator does not support tensor device type";
-static const auto TENSOR_DEVICE_TYPE_IS_NOT_SUPPORTED = "tensor device type is not supported";
-
-// ==================================================== Random ========================================================
-
-Tensor &random_(Tensor &self, c10::optional<Generator> gen) {
-  if (self.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::random_(self, gen);
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-Tensor &random_from_to(Tensor &self, int64_t from, optional<int64_t> to, c10::optional<Generator> gen) {
-  if (self.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::random_from_to(self, from, to, gen);
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-Tensor &random_to(Tensor &self, int64_t to, c10::optional<Generator> gen) {
-  if (self.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::random_to(self, to, gen);
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-// ==================================================== Uniform =======================================================
-
-Tensor &uniform_(Tensor &self, double from, double to, c10::optional<Generator> gen) {
-  if (self.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::uniform_(self, from, to, gen);
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-// ==================================================== Normal ========================================================
-
-Tensor &normal_(Tensor &self, double mean, double std, c10::optional<Generator> gen) {
-  if (self.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::normal_(self, mean, std, gen);
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-Tensor &normal_Tensor_float_out(const Tensor &mean, double std, c10::optional<Generator> gen, Tensor &output) {
-  if (output.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::normal_Tensor_float_out(output, mean, std, gen);
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-Tensor &normal_float_Tensor_out(double mean, const Tensor &std, c10::optional<Generator> gen, Tensor &output) {
-  if (output.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::normal_float_Tensor_out(output, mean, std, gen);
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-Tensor &normal_Tensor_Tensor_out(const Tensor &mean, const Tensor &std, c10::optional<Generator> gen, Tensor &output) {
-  if (output.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::normal_Tensor_Tensor_out(output, mean, std, gen);
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-Tensor normal_Tensor_float(const Tensor &mean, double std, c10::optional<Generator> gen) {
-  if (mean.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::normal_Tensor_float(mean, std, gen);
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-Tensor normal_float_Tensor(double mean, const Tensor &std, c10::optional<Generator> gen) {
-  if (std.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::normal_float_Tensor(mean, std, gen);
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-Tensor normal_Tensor_Tensor(const Tensor &mean, const Tensor &std, c10::optional<Generator> gen) {
-  if (mean.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::normal_Tensor_Tensor(mean, std, gen);
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-// ==================================================== Cauchy ========================================================
-
-Tensor &cauchy_(Tensor &self, double median, double sigma, c10::optional<Generator> gen) {
-  if (self.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::cauchy_(self, median, sigma, gen);
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-// ================================================== LogNormal =======================================================
-
-Tensor &log_normal_(Tensor &self, double mean, double std, c10::optional<Generator> gen) {
-  if (self.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::log_normal_(self, mean, std, gen);
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-// ================================================== Geometric =======================================================
-
-Tensor &geometric_(Tensor &self, double p, c10::optional<Generator> gen) {
-  if (self.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::geometric_(self, p, gen);
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-// ================================================== Exponential =====================================================
-
-Tensor &exponential_(Tensor &self, double lambda, c10::optional<Generator> gen) {
-  if (self.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::exponential_(self, lambda, gen);
-  } else {
-    TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE);
-  }
-}
-
-// =============================================== Random permutation =================================================
-
-// randperm implementation was copied from PyTorch to unblock CSPRNG users, but ultimately CSPRNG must reuse
-// refactored randperm from PyTorch, see https://github.com/pytorch/pytorch/issues/43816
-
-namespace {
-
-inline void check_supported_max_int_with_precision(int64_t n, const Tensor &tensor) {
-  TORCH_CHECK(at::scalar_tensor(n, tensor.options()).defined(), "n is too large for result tensor type: '",
-    tensor.toString(), "'");
-
-  // Ensure sufficient precision for floating point representation.
-  switch (tensor.scalar_type()) {
-    case at::ScalarType::Half:
-      TORCH_CHECK(n <= (int64_t(1) << 11) + 1, "n cannot be greater than 2049 for Half type.");
-      break;
-    case at::ScalarType::Float:
-      TORCH_CHECK(n <= (int64_t(1) << 24) + 1, "n cannot be greater than 2^24+1 for Float type.");
-      break;
-    case at::ScalarType::Double:  // Unlikely to happen, but doesn't hurt to check
-      TORCH_CHECK(n <= (int64_t(1) << 53) + 1, "n cannot be greater than 2^53+1 for Double type.");
-      break;
-    default:
-      break;
-  }
-}
-
-template <typename scalar_t, typename RNG>
-void randperm(Tensor &result, int64_t n, c10::optional<at::Generator> generator) {
-  auto gen = at::check_generator<RNG>(generator);
-  scalar_t *r__data = result.data_ptr<scalar_t>();
-
-  result.resize_({n});
-  int64_t r__stride_0 = result.stride(0);
-
-  at::parallel_for(0, n, internal::GRAIN_SIZE, [&r__data, &r__stride_0](int64_t p_begin, int64_t p_end) {
-    for (int64_t i = p_begin; i < p_end; i++) r__data[i * r__stride_0] = static_cast<scalar_t>(i);
-  });
-
-  for (int64_t i = 0; i < n - 1; i++) {
-    int64_t z = gen->random() % (n - i);
-    scalar_t sav = r__data[i * r__stride_0];
-    r__data[i * r__stride_0] = r__data[(z + i) * r__stride_0];
-    r__data[(z + i) * r__stride_0] = sav;
-  }
-}
-}  // namespace
-
-Tensor &randperm_generator_out(int64_t n, c10::optional<Generator> generator, Tensor &result) {
-  TORCH_CHECK(n >= 0, "n must be non-negative, got", n);
-  check_supported_max_int_with_precision(n, result);
-  if (result.device().type() == at::kCUDA) {
-    auto result_cpu = at::empty({n}, result.options().device(kCPU));
-    randperm_generator_out(n, generator, result_cpu);
-    result.resize_({n});
-    return result.copy_(result_cpu);
-  }
-  result.resize_({n});
-  // See Note [Acquire lock when using random generators]
-  std::lock_guard<std::mutex> lock(generator->mutex());
-  AT_DISPATCH_ALL_TYPES_AND(at::ScalarType::Half, result.scalar_type(), "randperm",
-    [&]() -> void { randperm<scalar_t, CSPRNGGeneratorImpl>(result, n, generator); });
-  return result;
-}
-
-// ================================================Encrypt/Decrypt=====================================================
-
-Tensor encrypt_pybind(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode) {
-  if (input.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::encrypt(input, output, key, cipher, mode);
-  } else {
-    TORCH_CHECK(false, TENSOR_DEVICE_TYPE_IS_NOT_SUPPORTED);
-  }
-}
-
-Tensor decrypt_pybind(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode) {
-  if (input.device().type() == DeviceType::CUDA) {
-    return torch::csprng::cuda::decrypt(input, output, key, cipher, mode);
-  } else {
-    TORCH_CHECK(false, TENSOR_DEVICE_TYPE_IS_NOT_SUPPORTED);
-  }
-}
-
-// ====================================================================================================================
-
-Generator create_random_device_generator(c10::optional<std::string> token = c10::nullopt) {
-  if (token.has_value()) {
-    return make_generator<CSPRNGGeneratorImpl>(*token);
-  } else {
-    return make_generator<CSPRNGGeneratorImpl>(true);
-  }
-}
-
-Generator create_mt19937_generator(c10::optional<uint64_t> seed = c10::nullopt) {
-  if (seed.has_value()) {
-    return make_generator<CSPRNGGeneratorImpl>(*seed);
-  } else {
-    return make_generator<CSPRNGGeneratorImpl>(false);
-  }
-}
-
-bool supports_cuda() {
-#ifdef WITH_CUDA
-  return true;
-#else
-  return false;
-#endif
-}
-
-TORCH_LIBRARY_IMPL(aten, CustomRNGKeyId, m) {
-  // Random
-  m.impl("random_.from", random_from_to);
-  m.impl("random_.to", random_to);
-  m.impl("random_", random_);
-  // Uniform
-  m.impl("uniform_", uniform_);
-  // Normal
-  m.impl("normal_", normal_);
-  m.impl("normal.Tensor_float_out", normal_Tensor_float_out);
-  m.impl("normal.float_Tensor_out", normal_float_Tensor_out);
-  m.impl("normal.Tensor_Tensor_out", normal_Tensor_Tensor_out);
-  m.impl("normal.Tensor_float", normal_Tensor_float);
-  m.impl("normal.float_Tensor", normal_float_Tensor);
-  m.impl("normal.Tensor_Tensor", normal_Tensor_Tensor);
-  // Cauchy
-  m.impl("cauchy_", cauchy_);
-  // LogNormal
-  m.impl("log_normal_", log_normal_);
-  // Geometric
-  m.impl("geometric_", geometric_);
-  // Exponential
-  m.impl("exponential_", exponential_);
-  // Random permutation
-  m.impl("randperm.generator_out", randperm_generator_out);
-}
diff --git a/torchcsprng/kernels.cu b/torchcsprng/kernels.cu
index 65bb01f..f55153d 100644
--- a/torchcsprng/kernels.cu
+++ b/torchcsprng/kernels.cu
@@ -5,7 +5,9 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include "kernels_commons.h"
+#include "kernels_commons.cuh"
+#include "aes.cuh"
+#include "owcf.cuh"
 
 namespace torch {
 namespace csprng {
@@ -20,382 +22,6 @@ namespace cuda {
  * LICENSE file in the root directory of this source tree.
  */
 
-#include "aes.h"
-
-// Generates `block_t_size`-bytes random key Tensor on CPU
-// using `generator`, which must be an instance of `at::CPUGeneratorImpl`
-// and passes it to the `device`.
-template <typename RNG>
-at::Tensor key_tensor(size_t block_t_size, c10::optional<at::Generator> generator) {
-  std::lock_guard<std::mutex> lock(generator->mutex());
-  auto gen = at::check_generator<RNG>(generator);
-  auto key = torch::empty({static_cast<signed long>(block_t_size)}, torch::kUInt8);
-  using random_t = typename std::result_of<decltype (&RNG::random)(RNG)>::type;
-  constexpr size_t random_t_size = sizeof(random_t);
-  for (size_t i = 0; i < block_t_size / random_t_size; i++) {
-    const auto rand = gen->random();
-    for (size_t j = 0; j < random_t_size; j++) {
-      size_t k = i * random_t_size + j;
-      key[k] = static_cast<uint8_t>((rand >> (j * 8)) & 0xff);
-    }
-  }
-  return key;
-}
-
-template <typename RNG>
-at::Tensor aes128_key_tensor(at::Generator generator) {
-  return key_tensor<RNG>(aes::block_t_size, generator);
-}
-
-// ====================================================================================================================
-
-// A simple container for random state sub-blocks that implements RNG interface
-// with random() and random64() methods, that are used by transformation function
-template <size_t size>
-struct RNGValues {
-  TORCH_CSPRNG_HOST_DEVICE RNGValues(uint64_t *vals) {
-    memcpy(&vals_, vals, size * sizeof(uint64_t));
-  }
-  uint32_t TORCH_CSPRNG_HOST_DEVICE random() {
-    auto res = static_cast<uint32_t>(vals_[index]);
-    index++;
-    return res;
-  }
-  uint64_t TORCH_CSPRNG_HOST_DEVICE random64() {
-    auto res = vals_[index];
-    index++;
-    return res;
-  }
-
- private:
-  uint64_t vals_[size];
-  int index = 0;
-};
-
-// Applies AES in CTR mode with the `key` for passed TensorIterator iter.
-// `scalar_t`       is a scalar type equivalent of target tensor dtype
-// `uint_t`         is an unsigned integral type of sub-blocks that random state is divided to
-//                  (e.g, 16 bytes random state block can be divided into 16 uint8_t sub-blocks
-//                  or 8 uint16_t sub-block or 4 uint32_t sub-block or 2 uint64_t sub-blocks)
-// `N`              is a number of sub-block which is used by `transform_func`
-//                  to generate a random value of specific distribution (e.g. `normal` uses 2)
-// `key`            is a CUDA pointer to random key memory block
-// `transform_func` is a callable that converts N `uint_t` random state sub-blocks passed in RNGValues into target dtype
-// `scalar_t`
-template <typename scalar_t, typename uint_t, size_t N = 1, typename transform_t>
-void aes_helper(at::TensorIterator &iter, const uint8_t *key_bytes, transform_t transform_func) {
-  auto output = iter.tensor(0);
-  const auto output_offset_calc = make_offset_calculator<1>(at::TensorIterator::nullary_op(output));
-  const auto output_index_calc = [output_offset_calc] TORCH_CSPRNG_HOST_DEVICE(
-                                   uint32_t li) -> uint32_t { return output_offset_calc.get(li)[0]; };
-  torch::csprng::block_cipher<aes::block_t_size>(
-    nullptr, 0, 0, output_index_calc, output.data_ptr(), output.numel(), output.element_size(), output_index_calc,
-    iter.device_type(),
-    [key_bytes] TORCH_CSPRNG_HOST_DEVICE(int64_t idx, uint8_t * block) -> void {
-      uint8_t idx_block[aes::block_t_size];
-      std::memset(&idx_block, 0, aes::block_t_size);
-      *(reinterpret_cast<int64_t *>(idx_block)) = idx;
-      aes::encrypt(idx_block, key_bytes);
-      for (size_t i = 0; i < aes::block_t_size; i++) {
-        block[i] ^= idx_block[i];
-      }
-    },
-    aes::block_t_size / (N * sizeof(uint_t)),
-    [transform_func] TORCH_CSPRNG_HOST_DEVICE(uint8_t * block) {
-      const auto n = aes::block_t_size / (N * sizeof(uint_t));
-      for (size_t i = 0; i < n; ++i) {
-        uint64_t vals[N];
-        for (size_t j = 0; j < N; ++j) {
-          vals[j] = (reinterpret_cast<uint_t *>(block))[N * i + j];
-        }
-        RNGValues<N> rng(vals);
-        reinterpret_cast<scalar_t *>(block)[i] = transform_func(&rng);
-      }
-    });
-}
-
-// ====================================================================================================================
-
-// A mapping between scalar type and corresponding unsigned integer type of random state sub-block.
-// uint64_t for double and long, uint32_t for the rest
-template <typename T>
-struct UIntType {};
-
-template <>
-struct UIntType<double> {
-  using type = uint64_t;
-};
-template <>
-struct UIntType<float> {
-  using type = uint32_t;
-};
-template <>
-struct UIntType<c10::Half> {
-  using type = uint16_t;
-};
-template <>
-struct UIntType<c10::BFloat16> {
-  using type = uint16_t;
-};
-template <>
-struct UIntType<int64_t> {
-  using type = uint64_t;
-};
-template <>
-struct UIntType<int32_t> {
-  using type = uint32_t;
-};
-template <>
-struct UIntType<int16_t> {
-  using type = uint32_t;
-};
-template <>
-struct UIntType<int8_t> {
-  using type = uint32_t;
-};
-template <>
-struct UIntType<uint8_t> {
-  using type = uint32_t;
-};
-template <>
-struct UIntType<bool> {
-  using type = uint32_t;
-};
-
-// ==================================================== Random ========================================================
-
-template <typename RNG>
-struct RandomKernel {
-  void operator()(TensorIterator &iter, c10::optional<Generator> generator) {
-    const Tensor key_t = aes128_key_tensor<RNG>(*generator).to(iter.device());
-    const auto key = key_t.data_ptr<uint8_t>();
-    AT_DISPATCH_ALL_TYPES_AND3(
-      at::ScalarType::Bool, at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "random_kernel", [&] {
-        aes_helper<scalar_t, UIntType<scalar_t>::type>(
-          iter, key, [] TORCH_CSPRNG_HOST_DEVICE(RNGValues<1> * generator) -> scalar_t {
-            uniform_int_distribution<scalar_t> random;
-            return random(generator);
-          });
-      });
-  }
-};
-
-template <typename scalar_t, typename uint_t>
-void random_from_to_kernel_helper(TensorIterator &iter, uint64_t range, int64_t base, const uint8_t *key) {
-  aes_helper<scalar_t, uint_t>(iter, key, [range, base] TORCH_CSPRNG_HOST_DEVICE(RNGValues<1> * generator) -> scalar_t {
-    uniform_int_from_to_distribution<scalar_t> random(range, base);
-    return random(generator);
-  });
-}
-
-template <typename scalar_t, typename uint_t>
-void random_full_range_kernel_helper(TensorIterator &iter, const uint8_t *key) {
-  aes_helper<scalar_t, uint_t>(iter, key, [] TORCH_CSPRNG_HOST_DEVICE(RNGValues<1> * generator) -> scalar_t {
-    uniform_int_full_range_distribution<scalar_t> random;
-    return random(generator);
-  });
-}
-
-template <typename RNG>
-struct RandomFromToKernel {
-  void operator()(TensorIterator &iter, uint64_t range, int64_t base, c10::optional<Generator> generator) {
-    const Tensor key_t = aes128_key_tensor<RNG>(*generator).to(iter.device());
-    const auto key = key_t.data_ptr<uint8_t>();
-    AT_DISPATCH_ALL_TYPES_AND3(
-      at::ScalarType::Bool, at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "random_from_to_kernel", [&] {
-        if ((std::is_same<scalar_t, int64_t>::value || std::is_same<scalar_t, double>::value ||
-              std::is_same<scalar_t, float>::value ||
-              std::is_same<scalar_t, at::BFloat16>::value) /* TODO: && range >= 1ULL << 32*/) {
-          random_from_to_kernel_helper<scalar_t, uint64_t>(iter, range, base, key);
-        } else {
-          random_from_to_kernel_helper<scalar_t, uint32_t>(iter, range, base, key);
-        }
-      });
-  }
-  void operator()(TensorIterator &iter, c10::optional<Generator> generator) {
-    const Tensor key_t = aes128_key_tensor<RNG>(*generator).to(iter.device());
-    const auto key = key_t.data_ptr<uint8_t>();
-    AT_DISPATCH_ALL_TYPES_AND3(at::ScalarType::Bool, at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(),
-      "random_full_64_bits_range_kernel", [&] {
-        if (std::is_same<scalar_t, int64_t>::value || std::is_same<scalar_t, double>::value ||
-            std::is_same<scalar_t, float>::value || std::is_same<scalar_t, at::BFloat16>::value) {
-          random_full_range_kernel_helper<scalar_t, uint64_t>(iter, key);
-        } else {
-          TORCH_CHECK(false, "random_full_64_bits_range_kernel_cuda handles only int64, double, float and bfloat16");
-        }
-      });
-  }
-};
-
-at::Tensor &random_(at::Tensor &self, c10::optional<at::Generator> generator) {
-  return at::native::templates::random_impl<RandomKernel, CSPRNGGeneratorImpl>(self, generator);
-}
-
-at::Tensor &random_from_to(
-  at::Tensor &self, int64_t from, c10::optional<int64_t> to, c10::optional<at::Generator> generator) {
-  return at::native::templates::random_from_to_impl<RandomFromToKernel, CSPRNGGeneratorImpl>(self, from, to, generator);
-}
-
-at::Tensor &random_to(at::Tensor &self, int64_t to, c10::optional<at::Generator> generator) {
-  return random_from_to(self, 0, to, generator);
-}
-
-// ==================================================== Uniform =======================================================
-
-template <typename RNG>
-struct UniformKernel {
-  void operator()(TensorIterator &iter, double from, double to, c10::optional<Generator> generator) {
-    const Tensor key_t = aes128_key_tensor<RNG>(*generator).to(iter.device());
-    const auto key = key_t.data_ptr<uint8_t>();
-    AT_DISPATCH_FLOATING_TYPES_AND2(
-      at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "uniform_kernel", [&] {
-        aes_helper<scalar_t, uint64_t>(
-          iter, key, [from, to] TORCH_CSPRNG_HOST_DEVICE(RNGValues<1> * generator) -> scalar_t {
-            uniform_real_distribution<double> uniform(from, to);
-            return static_cast<scalar_t>(uniform(generator));
-          });
-      });
-  }
-};
-
-at::Tensor &uniform_(at::Tensor &self, double from, double to, c10::optional<at::Generator> generator) {
-  return at::native::templates::uniform_impl_<UniformKernel, CSPRNGGeneratorImpl>(self, from, to, generator);
-}
-
-// ==================================================== Normal ========================================================
-
-template <typename RNG>
-struct NormalKernel {
-  void operator()(Tensor &self, double mean, double std, c10::optional<Generator> generator) {
-    auto iter = TensorIterator::nullary_op(self);
-    const Tensor key_t = aes128_key_tensor<RNG>(*generator).to(iter.device());
-    const auto key = key_t.data_ptr<uint8_t>();
-    AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "normal_kernel", [&] {
-      aes_helper<scalar_t, uint64_t, 2>(
-        iter, key, [mean, std] TORCH_CSPRNG_HOST_DEVICE(RNGValues<2> * gen) -> scalar_t {
-          normal_distribution<double> normal(mean, std);
-          return static_cast<scalar_t>(normal(gen));
-        });
-    });
-  }
-};
-
-at::Tensor &normal_(at::Tensor &self, double mean, double std, c10::optional<at::Generator> generator) {
-  return at::native::templates::normal_impl_<NormalKernel, CSPRNGGeneratorImpl>(self, mean, std, generator);
-}
-
-at::Tensor &normal_Tensor_float_out(
-  at::Tensor &output, const at::Tensor &mean, double std, c10::optional<at::Generator> gen) {
-  return at::native::templates::normal_out_impl<NormalKernel, CSPRNGGeneratorImpl>(output, mean, std, gen);
-}
-
-at::Tensor &normal_float_Tensor_out(
-  at::Tensor &output, double mean, const at::Tensor &std, c10::optional<at::Generator> gen) {
-  return at::native::templates::normal_out_impl<NormalKernel, CSPRNGGeneratorImpl>(output, mean, std, gen);
-}
-
-at::Tensor &normal_Tensor_Tensor_out(
-  at::Tensor &output, const at::Tensor &mean, const at::Tensor &std, c10::optional<at::Generator> gen) {
-  return at::native::templates::normal_out_impl<NormalKernel, CSPRNGGeneratorImpl>(output, mean, std, gen);
-}
-
-at::Tensor normal_Tensor_float(const at::Tensor &mean, double std, c10::optional<at::Generator> gen) {
-  return at::native::templates::normal_impl<NormalKernel, CSPRNGGeneratorImpl>(mean, std, gen);
-}
-
-at::Tensor normal_float_Tensor(double mean, const at::Tensor &std, c10::optional<at::Generator> gen) {
-  return at::native::templates::normal_impl<NormalKernel, CSPRNGGeneratorImpl>(mean, std, gen);
-}
-
-at::Tensor normal_Tensor_Tensor(const at::Tensor &mean, const at::Tensor &std, c10::optional<at::Generator> gen) {
-  return at::native::templates::normal_impl<NormalKernel, CSPRNGGeneratorImpl>(mean, std, gen);
-}
-
-// ==================================================== Cauchy ========================================================
-
-template <typename RNG>
-struct CauchyKernel {
-  void operator()(TensorIterator &iter, double median, double sigma, c10::optional<Generator> generator) {
-    const Tensor key_t = aes128_key_tensor<RNG>(*generator).to(iter.device());
-    const auto key = key_t.data_ptr<uint8_t>();
-    AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "cauchy_kernel", [&] {
-      aes_helper<scalar_t, uint64_t, 1>(
-        iter, key, [median, sigma] TORCH_CSPRNG_HOST_DEVICE(RNGValues<1> * gen) -> scalar_t {
-          cauchy_distribution<double> cauchy(median, sigma);
-          return static_cast<scalar_t>(cauchy(gen));
-        });
-    });
-  }
-};
-
-at::Tensor &cauchy_(at::Tensor &self, double median, double sigma, c10::optional<at::Generator> generator) {
-  return at::native::templates::cauchy_impl_<CauchyKernel, CSPRNGGeneratorImpl>(self, median, sigma, generator);
-}
-
-// ================================================== LogNormal =======================================================
-
-template <typename RNG>
-struct LogNormalKernel {
-  void operator()(TensorIterator &iter, double mean, double std, c10::optional<Generator> generator) {
-    const Tensor key_t = aes128_key_tensor<RNG>(*generator).to(iter.device());
-    const auto key = key_t.data_ptr<uint8_t>();
-    AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "log_normal", [&] {
-      aes_helper<scalar_t, uint64_t, 2>(
-        iter, key, [mean, std] TORCH_CSPRNG_HOST_DEVICE(RNGValues<2> * gen) -> scalar_t {
-          lognormal_distribution<double> logNormal(mean, std);
-          return static_cast<scalar_t>(logNormal(gen));
-        });
-    });
-  }
-};
-
-at::Tensor &log_normal_(at::Tensor &self, double mean, double std, c10::optional<at::Generator> gen) {
-  return at::native::templates::log_normal_impl_<LogNormalKernel, CSPRNGGeneratorImpl>(self, mean, std, gen);
-}
-
-// ================================================== Geometric =======================================================
-
-template <typename RNG>
-struct GeometricKernel {
-  void operator()(TensorIterator &iter, double p, c10::optional<Generator> generator) {
-    const Tensor key_t = aes128_key_tensor<RNG>(*generator).to(iter.device());
-    const auto key = key_t.data_ptr<uint8_t>();
-    AT_DISPATCH_FLOATING_TYPES_AND2(
-      at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "geometric_kernel", [&] {
-        aes_helper<scalar_t, UIntType<scalar_t>::type, 1>(
-          iter, key, [p] TORCH_CSPRNG_HOST_DEVICE(RNGValues<1> * gen) -> scalar_t {
-            geometric_distribution<scalar_t> geometric(p);
-            return geometric(gen);
-          });
-      });
-  }
-};
-
-at::Tensor &geometric_(at::Tensor &self, double p, c10::optional<at::Generator> gen) {
-  return at::native::templates::geometric_impl_<GeometricKernel, CSPRNGGeneratorImpl>(self, p, gen);
-}
-
-// ================================================== Exponential =====================================================
-
-template <typename RNG>
-struct ExponentialKernel {
-  void operator()(TensorIterator &iter, double lambda, c10::optional<Generator> generator) {
-    const Tensor key_t = aes128_key_tensor<RNG>(*generator).to(iter.device());
-    const auto key = key_t.data_ptr<uint8_t>();
-    AT_DISPATCH_FLOATING_TYPES_AND2(
-      at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "exponential_kernel", [&] {
-        aes_helper<scalar_t, uint64_t, 1>(iter, key, [lambda] TORCH_CSPRNG_HOST_DEVICE(RNGValues<1> * gen) -> scalar_t {
-          exponential_distribution<double> exponential(lambda);
-          return static_cast<scalar_t>(exponential(gen));
-        });
-      });
-  }
-};
-
-at::Tensor &exponential_(at::Tensor &self, double lambda, c10::optional<at::Generator> gen) {
-  return at::native::templates::exponential_impl_<ExponentialKernel, CSPRNGGeneratorImpl>(self, lambda, gen);
-}
-
 // ================================================Encrypt/Decrypt=====================================================
 
 void check_cipher(const std::string &cipher, Tensor key) {
@@ -407,32 +33,14 @@ void check_cipher(const std::string &cipher, Tensor key) {
 }
 
 void aes_ecb_encrypt(Tensor input, Tensor output, uint8_t *key_bytes) {
-  block_cipher<aes::block_t_size>(input, output,
-    [key_bytes] TORCH_CSPRNG_HOST_DEVICE(int64_t idx, uint8_t * block) -> void { aes::encrypt(block, key_bytes); });
-}
-
-void aes_ecb_decrypt(Tensor input, Tensor output, uint8_t *key_bytes) {
-  block_cipher<aes::block_t_size>(input, output,
-    [key_bytes] TORCH_CSPRNG_HOST_DEVICE(int64_t idx, uint8_t * block) -> void { aes::decrypt(block, key_bytes); });
-}
-
-void aes_ctr_encrypt(Tensor input, Tensor output, uint8_t *key_bytes) {
+  uint8_t round_key[aes::round_key_t_size];
+  aes::KeyExpansion(round_key, key_bytes);
   block_cipher<aes::block_t_size>(
-    input, output, [key_bytes] TORCH_CSPRNG_HOST_DEVICE(int64_t idx, uint8_t * block) -> void {
-      uint8_t idx_block[aes::block_t_size];
-      std::memset(&idx_block, 0, aes::block_t_size);
-      *(reinterpret_cast<int64_t *>(idx_block)) = idx;
-      aes::encrypt(idx_block, key_bytes);
-      for (size_t i = 0; i < aes::block_t_size; i++) {
-        block[i] ^= idx_block[i];
-      }
+    input, output, [round_key] TORCH_CSPRNG_HOST_DEVICE(int64_t idx, uint8_t * block) -> void {
+      owcf::matyas_meyer_oseas(block, round_key);
     });
 }
 
-void aes_ctr_decrypt(Tensor input, Tensor output, uint8_t *key_bytes) {
-  aes_ctr_encrypt(input, output, key_bytes);
-}
-
 Tensor encrypt(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode) {
   TORCH_CHECK(input.device() == output.device() && input.device() == key.device(),
     "input, output and key tensors must have the same device");
@@ -446,33 +54,8 @@ Tensor encrypt(Tensor input, Tensor output, Tensor key, const std::string &ciphe
   const auto key_bytes = reinterpret_cast<uint8_t *>(key.contiguous().data_ptr());
   if (mode == "ecb") {
     aes_ecb_encrypt(input, output, key_bytes);
-  } else if (mode == "ctr") {
-    aes_ctr_encrypt(input, output, key_bytes);
-  } else {
-    TORCH_CHECK(false, "encrypt/decrypt supports \"ecb\" and \"ctr\" modes, \"", mode, "\" is not supported.");
-  }
-  return output;
-}
-
-Tensor decrypt(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode) {
-  TORCH_CHECK(input.device() == output.device() && input.device() == key.device(),
-    "input, output and key tensors must have the same device");
-  const auto output_size_bytes = output.numel() * output.itemsize();
-  const auto input_size_bytes = input.numel() * input.itemsize();
-  const auto diff = input_size_bytes - output_size_bytes;
-  TORCH_CHECK(0 <= diff && diff < aes::block_t_size,
-    "output tensor size in bytes must be less then or equal to input tensor size in bytes, the difference must be less "
-    "than block size");
-  TORCH_CHECK(input_size_bytes % aes::block_t_size == 0,
-    "input tensor size in bytes must divisible by cipher block size in bytes");
-  check_cipher(cipher, key);
-  const auto key_bytes = reinterpret_cast<uint8_t *>(key.contiguous().data_ptr());
-  if (mode == "ecb") {
-    aes_ecb_decrypt(input, output, key_bytes);
-  } else if (mode == "ctr") {
-    aes_ctr_decrypt(input, output, key_bytes);
   } else {
-    TORCH_CHECK(false, "encrypt/decrypt supports \"ecb\" and \"ctr\" modes, \"", mode, "\" is not supported.");
+    TORCH_CHECK(false, "only supports \"ecb\" mode, \"", mode, "\" is not supported.");
   }
   return output;
 }
diff --git a/torchcsprng/kernels.cuh b/torchcsprng/kernels.cuh
index 4467e11..1c0ae97 100644
--- a/torchcsprng/kernels.cuh
+++ b/torchcsprng/kernels.cuh
@@ -23,59 +23,10 @@ namespace cuda {
  * LICENSE file in the root directory of this source tree.
  */
 
-// ==================================================== Random ========================================================
-
-at::Tensor &random_(at::Tensor &self, c10::optional<at::Generator> generator);
-
-at::Tensor &random_from_to(
-  at::Tensor &self, int64_t from, optional<int64_t> to, c10::optional<at::Generator> generator);
-
-at::Tensor &random_to(at::Tensor &self, int64_t to, c10::optional<at::Generator> generator);
-
-// ==================================================== Uniform =======================================================
-
-at::Tensor &uniform_(at::Tensor &self, double from, double to, c10::optional<at::Generator> generator);
-
-// ==================================================== Normal ========================================================
-
-at::Tensor &normal_(at::Tensor &self, double mean, double std, c10::optional<at::Generator> generator);
-
-at::Tensor &normal_Tensor_float_out(
-  at::Tensor &output, const at::Tensor &mean, double std, c10::optional<at::Generator> gen);
-
-at::Tensor &normal_float_Tensor_out(
-  at::Tensor &output, double mean, const at::Tensor &std, c10::optional<at::Generator> gen);
-
-at::Tensor &normal_Tensor_Tensor_out(
-  at::Tensor &output, const at::Tensor &mean, const at::Tensor &std, c10::optional<at::Generator> gen);
-
-at::Tensor normal_Tensor_float(const at::Tensor &mean, double std, c10::optional<at::Generator> gen);
-
-at::Tensor normal_float_Tensor(double mean, const at::Tensor &std, c10::optional<at::Generator> gen);
-
-at::Tensor normal_Tensor_Tensor(const at::Tensor &mean, const at::Tensor &std, c10::optional<at::Generator> gen);
-
-// ==================================================== Cauchy ========================================================
-
-at::Tensor &cauchy_(at::Tensor &self, double median, double sigma, c10::optional<at::Generator> generator);
-
-// ================================================== LogNormal =======================================================
-
-at::Tensor &log_normal_(at::Tensor &self, double mean, double std, c10::optional<at::Generator> gen);
-
-// ================================================== Geometric =======================================================
-
-at::Tensor &geometric_(at::Tensor &self, double p, c10::optional<at::Generator> gen);
-
-// ================================================== Exponential =====================================================
-
-at::Tensor &exponential_(at::Tensor &self, double lambda, c10::optional<at::Generator> gen);
-
 // ================================================Encrypt/Decrypt=====================================================
 
-Tensor encrypt(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode);
-
-Tensor decrypt(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode);
+at::Tensor encrypt(
+  at::Tensor input, at::Tensor output, at::Tensor key, const std::string &cipher, const std::string &mode);
 
 // The original kernels_body.inc ends here
 
diff --git a/torchcsprng/kernels_commons.h b/torchcsprng/kernels_commons.cuh
similarity index 98%
rename from torchcsprng/kernels_commons.h
rename to torchcsprng/kernels_commons.cuh
index 359ba2b..a5714a3 100644
--- a/torchcsprng/kernels_commons.h
+++ b/torchcsprng/kernels_commons.cuh
@@ -13,8 +13,8 @@
 #include <ATen/core/DistributionsHelper.h>
 #include <ATen/native/DistributionTemplates.h>
 #include <torch/utils.h>
-#include "macros.h"
-#include "block_cipher.h"
+#include "macros.cuh"
+#include "block_cipher.cuh"
 
 inline uint64_t make64BitsFrom32Bits(uint32_t hi, uint32_t lo) {
   return (static_cast<uint64_t>(hi) << 32) | lo;
diff --git a/torchcsprng/macros.h b/torchcsprng/macros.cuh
similarity index 94%
rename from torchcsprng/macros.h
rename to torchcsprng/macros.cuh
index d98d1bd..ccc64ed 100644
--- a/torchcsprng/macros.h
+++ b/torchcsprng/macros.cuh
@@ -11,6 +11,7 @@
 #define TORCH_CSPRNG_HOST_DEVICE __host__ __device__
 #define TORCH_CSPRNG_CONSTANT __constant__
 #else
+#error "CUDA not found"
 #define TORCH_CSPRNG_HOST_DEVICE
 #define TORCH_CSPRNG_CONSTANT
 #endif
diff --git a/torchcsprng/owcf.cu b/torchcsprng/owcf.cu
new file mode 100644
index 0000000..1c5ecda
--- /dev/null
+++ b/torchcsprng/owcf.cu
@@ -0,0 +1,30 @@
+// Copyright (C) myl7
+// SPDX-License-Identifier: BSD-3-Clause
+
+#include "owcf.cuh"
+#include "aes.cuh"
+
+namespace owcf {
+
+TORCH_CSPRNG_HOST_DEVICE inline void memcpy_block(uint32_t *dest, const uint32_t *src) {
+  dest[0] = src[0];
+  dest[1] = src[1];
+  dest[2] = src[2];
+  dest[3] = src[3];
+}
+
+TORCH_CSPRNG_HOST_DEVICE inline void xor_block(uint32_t *dest, const uint32_t *src) {
+  dest[0] ^= src[0];
+  dest[1] ^= src[1];
+  dest[2] ^= src[2];
+  dest[3] ^= src[3];
+}
+
+TORCH_CSPRNG_HOST_DEVICE void matyas_meyer_oseas(uint8_t *state, const uint8_t *round_key) {
+  uint32_t input[aes::block_t_size / sizeof(uint32_t)];
+  memcpy_block(input, reinterpret_cast<const uint32_t *>(state));
+  aes::encrypt_with_round_key(state, round_key);
+  xor_block(reinterpret_cast<uint32_t *>(state), input);
+}
+
+}  // namespace owcf
diff --git a/torchcsprng/owcf.cuh b/torchcsprng/owcf.cuh
new file mode 100644
index 0000000..8f88d96
--- /dev/null
+++ b/torchcsprng/owcf.cuh
@@ -0,0 +1,17 @@
+// Copyright (C) myl7
+// SPDX-License-Identifier: BSD-3-Clause
+
+//! One-way compression functions
+
+#pragma once
+
+#include <cstdint>
+#include <cstddef>
+#include "macros.cuh"
+
+namespace owcf {
+
+/// Matyas–Meyer–Oseas based on AES128
+TORCH_CSPRNG_HOST_DEVICE void matyas_meyer_oseas(uint8_t *state, const uint8_t *round_key);
+
+}  // namespace owcf

From a2fd7600de2d15c0a7a8f1e68ddc40e8c1004cbd Mon Sep 17 00:00:00 2001
From: myl7 <myl@myl.moe>
Date: Mon, 22 Jan 2024 10:10:51 +0800
Subject: [PATCH 06/10] Remove part of pytorch dep

Update cmake config to exclude headers
---
 CMakeLists.txt                  |  9 ----
 lib.cpp                         | 11 ++---
 torchcsprng/block_cipher.cuh    | 24 +++-------
 torchcsprng/kernels.cu          | 46 +++++++++----------
 torchcsprng/kernels.cuh         |  6 +--
 torchcsprng/kernels_commons.cuh | 78 ---------------------------------
 6 files changed, 34 insertions(+), 140 deletions(-)
 delete mode 100644 torchcsprng/kernels_commons.cuh

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1899a4b..854c104 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -26,18 +26,9 @@ include_directories(${PYTHON_INCLUDE_DIRS})
 add_library(
   fssprgcuda SHARED
   lib.cpp
-  lib.h
   torchcsprng/kernels.cu
-  torchcsprng/kernels.cuh
-  torchcsprng/macros.cuh
-  torchcsprng/kernels_commons.cuh
-  torchcsprng/block_cipher.cuh
-  torchcsprng/OffsetCalculator.cuh
-  torchcsprng/THCIntegerDivider.cuh
   torchcsprng/owcf.cu
-  torchcsprng/owcf.cuh
   torchcsprng/aes.cu
-  torchcsprng/aes.cuh
 )
 set_target_properties(fssprgcuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
 target_link_libraries(fssprgcuda "${TORCH_LIBRARIES}")
diff --git a/lib.cpp b/lib.cpp
index dfb406e..dfca417 100644
--- a/lib.cpp
+++ b/lib.cpp
@@ -6,21 +6,18 @@
 
 using torch::Tensor;
 
-extern Tensor encrypt(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode);
+extern Tensor encrypt(Tensor buf, const uint8_t *key, size_t key_size, const std::string &cipher);
 
 constexpr size_t block_t_size = 16;
 
-void csprng_matyas_meyer_oseas_aes128(uint8_t *buf, int64_t buf_size, const uint8_t *key) {
+void csprng_matyas_meyer_oseas_aes128(uint8_t *buf, size_t buf_size, const uint8_t *key, size_t key_size) {
   const auto input_size_bytes = buf_size;
   TORCH_CHECK(input_size_bytes % block_t_size == 0, "input size in bytes(", input_size_bytes,
     ") is not a multiple of block size(", block_t_size, ")");
-  Tensor input = torch::from_blob(buf, {input_size_bytes}, torch::kUInt8).to(torch::kCUDA);
-
-  const auto output_size_bytes = input_size_bytes;
-  Tensor output = torch::empty({output_size_bytes}, torch::kUInt8);
+  Tensor input = torch::from_blob(buf, {static_cast<int32_t>(input_size_bytes)}, torch::kUInt8).to(torch::kCUDA);
 
   const auto key_size_bytes = 16;
   Tensor key_tensor = torch::from_blob(const_cast<uint8_t *>(key), {key_size_bytes}, torch::kUInt8).to(torch::kCUDA);
 
-  encrypt(input, output, key_tensor, "aes128", "ecb");
+  const auto output = encrypt(input, key, key_size, "aes128");
 }
diff --git a/torchcsprng/block_cipher.cuh b/torchcsprng/block_cipher.cuh
index d36bf94..5e0f27c 100644
--- a/torchcsprng/block_cipher.cuh
+++ b/torchcsprng/block_cipher.cuh
@@ -62,7 +62,8 @@ TORCH_CSPRNG_HOST_DEVICE static void block_cipher_kernel_helper(int64_t idx, cip
   void *input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, void *output_ptr,
   int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc, transform_t transform) {
   uint8_t block[block_size];
-  std::memset(&block, 0, block_size);  // is it ok to use zeros as padding?
+  // std::memset(&block, 0, block_size);  // is it ok to use zeros as padding?
+  // No need to pad because we ensure `input_size_bytes % block_t_size == 0` previously in lib.cpp.
   // In this application, we require users to pass in the input that is a multiple of block_size.
   // So zero padding never actually happens and it is ok.
   if (input_ptr != nullptr) {
@@ -118,7 +119,7 @@ void block_cipher(void *input_ptr, int64_t input_numel, int input_type_size, inp
 }
 
 template <int block_size, typename cipher_t>
-void block_cipher(at::Tensor input, at::Tensor output, cipher_t cipher) {
+void block_cipher(at::Tensor input, cipher_t cipher) {
   const auto input_ptr = input.data_ptr();
   const auto input_numel = input.numel();
 
@@ -132,23 +133,10 @@ void block_cipher(at::Tensor input, at::Tensor output, cipher_t cipher) {
   const auto input_index_calc = [input_offset_calc] TORCH_CSPRNG_HOST_DEVICE(
                                   uint32_t li) -> uint32_t { return input_offset_calc.get(li)[0]; };
 
-  const auto output_ptr = output.data_ptr();
-  const auto output_numel = output.numel();
+  const auto device = input.device();
 
-  // Otherwise OffsetCalculator/IntDivider crashes with integer division by zero
-  if (output_ptr == nullptr || output_numel == 0) {
-    return;
-  }
-
-  const auto output_type_size = output.element_size();
-  const auto output_offset_calc = make_offset_calculator<1>(at::TensorIterator::nullary_op(output));
-  const auto output_index_calc = [output_offset_calc] TORCH_CSPRNG_HOST_DEVICE(
-                                   uint32_t li) -> uint32_t { return output_offset_calc.get(li)[0]; };
-
-  const auto device = output.device();
-
-  torch::csprng::block_cipher<block_size>(input_ptr, input_numel, input_type_size, input_index_calc, output_ptr,
-    output_numel, output_type_size, output_index_calc, device, cipher, block_size / output_type_size,
+  torch::csprng::block_cipher<block_size>(input_ptr, input_numel, input_type_size, input_index_calc, input_ptr,
+    input_numel, input_type_size, input_index_calc, device, cipher, block_size / input_type_size,
     [] TORCH_CSPRNG_HOST_DEVICE(uint8_t * x) {});
 }
 
diff --git a/torchcsprng/kernels.cu b/torchcsprng/kernels.cu
index f55153d..6f1567f 100644
--- a/torchcsprng/kernels.cu
+++ b/torchcsprng/kernels.cu
@@ -5,10 +5,14 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include "kernels_commons.cuh"
+#include "kernels.cuh"
+#include <ATen/Tensor.h>
+#include "block_cipher.cuh"
 #include "aes.cuh"
 #include "owcf.cuh"
 
+using at::Tensor;
+
 namespace torch {
 namespace csprng {
 namespace cuda {
@@ -24,40 +28,32 @@ namespace cuda {
 
 // ================================================Encrypt/Decrypt=====================================================
 
-void check_cipher(const std::string &cipher, Tensor key) {
+void check_cipher(const std::string &cipher, size_t key_size) {
   if (cipher == "aes128") {
-    TORCH_CHECK(key.element_size() * key.numel() == 16, "key tensor must have 16 bytes(128 bits)");
+    TORCH_CHECK(key_size == 16, "key tensor must have 16 bytes(128 bits)");
   } else {
     TORCH_CHECK(false, "encrypt/decrypt supports \"aes128\" cipher, \"", cipher, "\" is not supported.");
   }
 }
 
-void aes_ecb_encrypt(Tensor input, Tensor output, uint8_t *key_bytes) {
+void aes_ecb_encrypt(Tensor input, const uint8_t *key_bytes) {
   uint8_t round_key[aes::round_key_t_size];
   aes::KeyExpansion(round_key, key_bytes);
-  block_cipher<aes::block_t_size>(
-    input, output, [round_key] TORCH_CSPRNG_HOST_DEVICE(int64_t idx, uint8_t * block) -> void {
-      owcf::matyas_meyer_oseas(block, round_key);
-    });
+  block_cipher<aes::block_t_size>(input, [round_key] TORCH_CSPRNG_HOST_DEVICE(int64_t idx, uint8_t * block) -> void {
+    owcf::matyas_meyer_oseas(block, round_key);
+  });
 }
 
-Tensor encrypt(Tensor input, Tensor output, Tensor key, const std::string &cipher, const std::string &mode) {
-  TORCH_CHECK(input.device() == output.device() && input.device() == key.device(),
-    "input, output and key tensors must have the same device");
-  const auto output_size_bytes = output.numel() * output.itemsize();
-  const auto input_size_bytes = input.numel() * input.itemsize();
-  const auto input_size_bytes_rounded =
-    (input_size_bytes + aes::block_t_size - 1) / aes::block_t_size * aes::block_t_size;
-  TORCH_CHECK(output_size_bytes == input_size_bytes_rounded, "output size in bytes(", output_size_bytes,
-    ") is not equal to input size in bytes rounded to block size(", input_size_bytes_rounded, ")");
-  check_cipher(cipher, key);
-  const auto key_bytes = reinterpret_cast<uint8_t *>(key.contiguous().data_ptr());
-  if (mode == "ecb") {
-    aes_ecb_encrypt(input, output, key_bytes);
-  } else {
-    TORCH_CHECK(false, "only supports \"ecb\" mode, \"", mode, "\" is not supported.");
-  }
-  return output;
+Tensor encrypt(Tensor buf, const uint8_t *key, size_t key_size, const std::string &cipher) {
+  const auto input_size_bytes = buf.numel() * buf.itemsize();
+  // const auto input_size_bytes_rounded =
+  //   (input_size_bytes + aes::block_t_size - 1) / aes::block_t_size * aes::block_t_size;
+  // TORCH_CHECK(output_size_bytes == input_size_bytes_rounded, "output size in bytes(", output_size_bytes,
+  //   ") is not equal to input size in bytes rounded to block size(", input_size_bytes_rounded, ")");
+  // No need to check because we ensure `input_size_bytes % block_t_size == 0` previously in lib.cpp.
+  check_cipher(cipher, key_size);
+  aes_ecb_encrypt(buf, key);
+  return buf;
 }
 
 // The original kernels_body.inc ends here
diff --git a/torchcsprng/kernels.cuh b/torchcsprng/kernels.cuh
index 1c0ae97..6dab64f 100644
--- a/torchcsprng/kernels.cuh
+++ b/torchcsprng/kernels.cuh
@@ -7,9 +7,10 @@
 
 #pragma once
 
-#include <ATen/Generator.h>
 #include <ATen/Tensor.h>
 
+using at::Tensor;
+
 namespace torch {
 namespace csprng {
 namespace cuda {
@@ -25,8 +26,7 @@ namespace cuda {
 
 // ================================================Encrypt/Decrypt=====================================================
 
-at::Tensor encrypt(
-  at::Tensor input, at::Tensor output, at::Tensor key, const std::string &cipher, const std::string &mode);
+Tensor encrypt(Tensor buf, const uint8_t *key, size_t key_size, const std::string &cipher);
 
 // The original kernels_body.inc ends here
 
diff --git a/torchcsprng/kernels_commons.cuh b/torchcsprng/kernels_commons.cuh
deleted file mode 100644
index a5714a3..0000000
--- a/torchcsprng/kernels_commons.cuh
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#pragma once
-
-#include <random>
-#include <ATen/Generator.h>
-#include <ATen/Tensor.h>
-#include <ATen/core/DistributionsHelper.h>
-#include <ATen/native/DistributionTemplates.h>
-#include <torch/utils.h>
-#include "macros.cuh"
-#include "block_cipher.cuh"
-
-inline uint64_t make64BitsFrom32Bits(uint32_t hi, uint32_t lo) {
-  return (static_cast<uint64_t>(hi) << 32) | lo;
-}
-
-// CUDA CSPRNG is actually CPU generator which is used only to generate a random key on CPU for AES running in a block
-// mode on CUDA
-struct CSPRNGGeneratorImpl : public c10::GeneratorImpl {
-  CSPRNGGeneratorImpl(bool use_rd)
-      : c10::GeneratorImpl{at::Device(at::DeviceType::CPU), at::DispatchKeySet(at::DispatchKey::CustomRNGKeyId)},
-        use_rd_{use_rd} {}
-  CSPRNGGeneratorImpl(const std::string &token)
-      : c10::GeneratorImpl{at::Device(at::DeviceType::CPU), at::DispatchKeySet(at::DispatchKey::CustomRNGKeyId)},
-        use_rd_{true},
-        rd_{token} {}
-  CSPRNGGeneratorImpl(uint64_t seed)
-      : c10::GeneratorImpl{at::Device(at::DeviceType::CPU), at::DispatchKeySet(at::DispatchKey::CustomRNGKeyId)},
-        use_rd_{false},
-        mt_{static_cast<unsigned int>(seed)} {}
-  ~CSPRNGGeneratorImpl() = default;
-  uint32_t random() {
-    return use_rd_ ? rd_() : mt_();
-  }
-  uint64_t random64() {
-    return use_rd_ ? make64BitsFrom32Bits(rd_(), rd_()) : make64BitsFrom32Bits(mt_(), mt_());
-  }
-
-  void set_current_seed(uint64_t seed) override {
-    throw std::runtime_error("not implemented");
-  }
-  uint64_t current_seed() const override {
-    throw std::runtime_error("not implemented");
-  }
-  uint64_t seed() override {
-    throw std::runtime_error("not implemented");
-  }
-  CSPRNGGeneratorImpl *clone_impl() const override {
-    throw std::runtime_error("not implemented");
-  }
-
-  static at::DeviceType device_type() {
-    return at::DeviceType::CPU;
-  }
-
-  void set_state(const c10::TensorImpl &new_state) override {
-    throw std::runtime_error("not implemented");
-  }
-  c10::intrusive_ptr<c10::TensorImpl> get_state() const override {
-    throw std::runtime_error("not implemented");
-  }
-
-  void set_offset(uint64_t offset) override {
-    throw std::runtime_error("not implemented");
-  }
-  uint64_t get_offset() const override {
-    throw std::runtime_error("not implenented");
-  }
-  bool use_rd_;
-  std::random_device rd_;
-  std::mt19937 mt_;
-};

From 6089f1c0b4572705649cd99e76811490a7b9357c Mon Sep 17 00:00:00 2001
From: myl7 <myl@myl.moe>
Date: Mon, 22 Jan 2024 10:49:41 +0800
Subject: [PATCH 07/10] Rearrange cpp cmake package

---
 CMakeLists.txt                                       | 11 ++++++-----
 include/fssprgcuda.h                                 | 12 ++++++++++++
 lib.h                                                |  8 --------
 lib.cpp => src/fssprgcuda.cpp                        | 12 ++++++++----
 .../torchcsprng}/OffsetCalculator.cuh                |  0
 .../torchcsprng}/THCIntegerDivider.cuh               |  0
 {torchcsprng => src/torchcsprng}/aes.cu              |  0
 {torchcsprng => src/torchcsprng}/aes.cuh             |  0
 {torchcsprng => src/torchcsprng}/block_cipher.cuh    |  0
 {torchcsprng => src/torchcsprng}/kernels.cu          |  0
 {torchcsprng => src/torchcsprng}/kernels.cuh         |  0
 {torchcsprng => src/torchcsprng}/macros.cuh          |  0
 {torchcsprng => src/torchcsprng}/owcf.cu             |  0
 {torchcsprng => src/torchcsprng}/owcf.cuh            |  0
 {torchcsprng => src/torchcsprng}/version.txt         |  0
 15 files changed, 26 insertions(+), 17 deletions(-)
 create mode 100644 include/fssprgcuda.h
 delete mode 100644 lib.h
 rename lib.cpp => src/fssprgcuda.cpp (73%)
 rename {torchcsprng => src/torchcsprng}/OffsetCalculator.cuh (100%)
 rename {torchcsprng => src/torchcsprng}/THCIntegerDivider.cuh (100%)
 rename {torchcsprng => src/torchcsprng}/aes.cu (100%)
 rename {torchcsprng => src/torchcsprng}/aes.cuh (100%)
 rename {torchcsprng => src/torchcsprng}/block_cipher.cuh (100%)
 rename {torchcsprng => src/torchcsprng}/kernels.cu (100%)
 rename {torchcsprng => src/torchcsprng}/kernels.cuh (100%)
 rename {torchcsprng => src/torchcsprng}/macros.cuh (100%)
 rename {torchcsprng => src/torchcsprng}/owcf.cu (100%)
 rename {torchcsprng => src/torchcsprng}/owcf.cuh (100%)
 rename {torchcsprng => src/torchcsprng}/version.txt (100%)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 854c104..e29a6e4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -25,11 +25,12 @@ include_directories(${PYTHON_INCLUDE_DIRS})
 
 add_library(
   fssprgcuda SHARED
-  lib.cpp
-  torchcsprng/kernels.cu
-  torchcsprng/owcf.cu
-  torchcsprng/aes.cu
+  src/fssprgcuda.cpp
+  src/torchcsprng/kernels.cu
+  src/torchcsprng/owcf.cu
+  src/torchcsprng/aes.cu
 )
 set_target_properties(fssprgcuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
-target_link_libraries(fssprgcuda "${TORCH_LIBRARIES}")
+target_include_directories(fssprgcuda PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
+target_link_libraries(fssprgcuda PRIVATE "${TORCH_LIBRARIES}")
 target_compile_options(fssprgcuda PRIVATE $<$<COMPILE_LANGUAGE: CUDA>: --expt-extended-lambda>)
diff --git a/include/fssprgcuda.h b/include/fssprgcuda.h
new file mode 100644
index 0000000..5907d1d
--- /dev/null
+++ b/include/fssprgcuda.h
@@ -0,0 +1,12 @@
+// Copyright (C) myl7
+// SPDX-License-Identifier: BSD-3-Clause
+
+#pragma once
+
+#include <cstdint>
+
+namespace fssprgcuda {
+
+void matyas_meyer_oseas_aes128(uint8_t *buf, int64_t buf_size, const uint8_t *key);
+
+}  // namespace fssprgcuda
diff --git a/lib.h b/lib.h
deleted file mode 100644
index b809e76..0000000
--- a/lib.h
+++ /dev/null
@@ -1,8 +0,0 @@
-// Copyright (C) myl7
-// SPDX-License-Identifier: BSD-3-Clause
-
-#pragma once
-
-#include <cstdint>
-
-void csprng_matyas_meyer_oseas_aes128(uint8_t *buf, int64_t buf_size, const uint8_t *key);
diff --git a/lib.cpp b/src/fssprgcuda.cpp
similarity index 73%
rename from lib.cpp
rename to src/fssprgcuda.cpp
index dfca417..4836b3e 100644
--- a/lib.cpp
+++ b/src/fssprgcuda.cpp
@@ -1,16 +1,18 @@
 // Copyright (C) myl7
 // SPDX-License-Identifier: BSD-3-Clause
 
-#include "lib.h"
+#include "fssprgcuda.h"
 #include <torch/torch.h>
+#include "torchcsprng/kernels.cuh"
 
 using torch::Tensor;
-
-extern Tensor encrypt(Tensor buf, const uint8_t *key, size_t key_size, const std::string &cipher);
+using torch::csprng::cuda::encrypt;
 
 constexpr size_t block_t_size = 16;
 
-void csprng_matyas_meyer_oseas_aes128(uint8_t *buf, size_t buf_size, const uint8_t *key, size_t key_size) {
+namespace fssprgcuda {
+
+void matyas_meyer_oseas_aes128(uint8_t *buf, size_t buf_size, const uint8_t *key, size_t key_size) {
   const auto input_size_bytes = buf_size;
   TORCH_CHECK(input_size_bytes % block_t_size == 0, "input size in bytes(", input_size_bytes,
     ") is not a multiple of block size(", block_t_size, ")");
@@ -21,3 +23,5 @@ void csprng_matyas_meyer_oseas_aes128(uint8_t *buf, size_t buf_size, const uint8
 
   const auto output = encrypt(input, key, key_size, "aes128");
 }
+
+}  // namespace fssprgcuda
diff --git a/torchcsprng/OffsetCalculator.cuh b/src/torchcsprng/OffsetCalculator.cuh
similarity index 100%
rename from torchcsprng/OffsetCalculator.cuh
rename to src/torchcsprng/OffsetCalculator.cuh
diff --git a/torchcsprng/THCIntegerDivider.cuh b/src/torchcsprng/THCIntegerDivider.cuh
similarity index 100%
rename from torchcsprng/THCIntegerDivider.cuh
rename to src/torchcsprng/THCIntegerDivider.cuh
diff --git a/torchcsprng/aes.cu b/src/torchcsprng/aes.cu
similarity index 100%
rename from torchcsprng/aes.cu
rename to src/torchcsprng/aes.cu
diff --git a/torchcsprng/aes.cuh b/src/torchcsprng/aes.cuh
similarity index 100%
rename from torchcsprng/aes.cuh
rename to src/torchcsprng/aes.cuh
diff --git a/torchcsprng/block_cipher.cuh b/src/torchcsprng/block_cipher.cuh
similarity index 100%
rename from torchcsprng/block_cipher.cuh
rename to src/torchcsprng/block_cipher.cuh
diff --git a/torchcsprng/kernels.cu b/src/torchcsprng/kernels.cu
similarity index 100%
rename from torchcsprng/kernels.cu
rename to src/torchcsprng/kernels.cu
diff --git a/torchcsprng/kernels.cuh b/src/torchcsprng/kernels.cuh
similarity index 100%
rename from torchcsprng/kernels.cuh
rename to src/torchcsprng/kernels.cuh
diff --git a/torchcsprng/macros.cuh b/src/torchcsprng/macros.cuh
similarity index 100%
rename from torchcsprng/macros.cuh
rename to src/torchcsprng/macros.cuh
diff --git a/torchcsprng/owcf.cu b/src/torchcsprng/owcf.cu
similarity index 100%
rename from torchcsprng/owcf.cu
rename to src/torchcsprng/owcf.cu
diff --git a/torchcsprng/owcf.cuh b/src/torchcsprng/owcf.cuh
similarity index 100%
rename from torchcsprng/owcf.cuh
rename to src/torchcsprng/owcf.cuh
diff --git a/torchcsprng/version.txt b/src/torchcsprng/version.txt
similarity index 100%
rename from torchcsprng/version.txt
rename to src/torchcsprng/version.txt

From b666ab1e0faadbcc534bb9508342a0174aa25215 Mon Sep 17 00:00:00 2001
From: myl7 <myl@myl.moe>
Date: Fri, 12 Apr 2024 12:13:07 +0800
Subject: [PATCH 08/10] Drop dep torch

---
 CMakeLists.txt                       |  24 ++----
 src/fssprgcuda.cpp                   |  14 +---
 src/torchcsprng/OffsetCalculator.cuh | 108 --------------------------
 src/torchcsprng/block_cipher.cuh     | 110 +++++++++------------------
 src/torchcsprng/kernels.cu           |  30 +++-----
 src/torchcsprng/kernels.cuh          |   8 +-
 6 files changed, 57 insertions(+), 237 deletions(-)
 delete mode 100644 src/torchcsprng/OffsetCalculator.cuh

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e29a6e4..02b5ace 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,30 +1,17 @@
-# Copyright (C) myl7
-# SPDX-License-Identifier: BSD-3-Clause
-
+# For Ubuntu Jammy 22.04 so far
 cmake_minimum_required(VERSION 3.22)
 # Set env `CUDACXX=/absolute/path/to/nvcc` to enable the CUDA language
 # if nvcc is not in the PATH.
 project(fss-prg-cuda LANGUAGES CUDA CXX)
 set(CMAKE_CXX_STANDARD 17)
 
+# If the builder is confused by a soft link from /usr/local/cuda-* to /usr/local/cuda,
+# pass `-DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-*` to CMake to locate CUDA.
 include(CheckLanguage)
 check_language(CUDA)
 
-# Pass `-DCMAKE_PREFIX_PATH=/absolute/path/to/libtorch` to cmake to locate LibTorch.
-# If the package cmake config is confused with a soft link from /usr/local/cuda-* to /usr/local/cuda,
-# pass `-DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-*` to cmake to locate CUDA.
-# Pass `-DGPU_ARCHS=<NUMBER>` to cmake to specify the CPU archtecture.
-# You can check the value alternatively at https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ .
-# This value set by LibTorch (version that supports CUDA 12.1) has arch that is not supported by CUDA 12.1 instead.
-find_package(Torch REQUIRED)
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
-
-# Fix Python.h not found even though it is just located in /usr/include/python*
-find_package(PythonLibs REQUIRED)
-include_directories(${PYTHON_INCLUDE_DIRS})
-
 add_library(
-  fssprgcuda SHARED
+  fssprgcuda
   src/fssprgcuda.cpp
   src/torchcsprng/kernels.cu
   src/torchcsprng/owcf.cu
@@ -32,5 +19,4 @@ add_library(
 )
 set_target_properties(fssprgcuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
 target_include_directories(fssprgcuda PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
-target_link_libraries(fssprgcuda PRIVATE "${TORCH_LIBRARIES}")
-target_compile_options(fssprgcuda PRIVATE $<$<COMPILE_LANGUAGE: CUDA>: --expt-extended-lambda>)
+target_compile_options(fssprgcuda PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
diff --git a/src/fssprgcuda.cpp b/src/fssprgcuda.cpp
index 4836b3e..192c8fd 100644
--- a/src/fssprgcuda.cpp
+++ b/src/fssprgcuda.cpp
@@ -2,26 +2,16 @@
 // SPDX-License-Identifier: BSD-3-Clause
 
 #include "fssprgcuda.h"
-#include <torch/torch.h>
 #include "torchcsprng/kernels.cuh"
 
-using torch::Tensor;
 using torch::csprng::cuda::encrypt;
 
 constexpr size_t block_t_size = 16;
 
 namespace fssprgcuda {
 
-void matyas_meyer_oseas_aes128(uint8_t *buf, size_t buf_size, const uint8_t *key, size_t key_size) {
-  const auto input_size_bytes = buf_size;
-  TORCH_CHECK(input_size_bytes % block_t_size == 0, "input size in bytes(", input_size_bytes,
-    ") is not a multiple of block size(", block_t_size, ")");
-  Tensor input = torch::from_blob(buf, {static_cast<int32_t>(input_size_bytes)}, torch::kUInt8).to(torch::kCUDA);
-
-  const auto key_size_bytes = 16;
-  Tensor key_tensor = torch::from_blob(const_cast<uint8_t *>(key), {key_size_bytes}, torch::kUInt8).to(torch::kCUDA);
-
-  const auto output = encrypt(input, key, key_size, "aes128");
+int matyas_meyer_oseas_aes128(uint8_t *buf, size_t buf_size, const uint8_t *key, size_t key_size) {
+  return encrypt(buf, buf_size, key, key_size, "aes128");
 }
 
 }  // namespace fssprgcuda
diff --git a/src/torchcsprng/OffsetCalculator.cuh b/src/torchcsprng/OffsetCalculator.cuh
deleted file mode 100644
index 15fcd0e..0000000
--- a/src/torchcsprng/OffsetCalculator.cuh
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#pragma once
-
-#include <array>
-#include <cstdint>
-#include <c10/macros/Macros.h>
-#include <ATen/core/Array.h>
-#include <ATen/native/TensorIterator.h>
-#include "THCIntegerDivider.cuh"
-
-/// OffsetCalculator calculates the offset in bytes of a linear index for NARGS
-/// operands that share the same shape, but may have different strides.
-
-#ifdef __HIP_PLATFORM_HCC__
-constexpr int MAX_DIMS = 16;
-#else
-constexpr int MAX_DIMS = 25;
-#endif
-
-template <int NARGS, typename index_t = uint32_t>
-struct OffsetCalculator {
-  // The offset for each argument. Wrapper around fixed-size array.
-  // On CUDA, zero sized array is not allowed, so when we are handling nullary
-  // operators, we need to create a size 1 offset to avoid compiler failure.
-  // This size 1 offset is just a placeholder, and we will not use it.
-  using offset_type = at::detail::Array<index_t, std::max<int>(NARGS, 1)>;
-
-  // if element_sizes is nullptr, then the strides will be in bytes, otherwise
-  // the strides will be in # of elements.
-  OffsetCalculator(
-    int dims, const int64_t *sizes, const int64_t *const *strides, const int64_t *element_sizes = nullptr)
-      : dims(dims) {
-    TORCH_CHECK(dims <= MAX_DIMS, "tensor has too many (>", MAX_DIMS, ") dims");
-    for (int i = 0; i < MAX_DIMS; ++i) {
-      if (i < dims) {
-        sizes_[i] = IntDivider<index_t>(sizes[i]);
-      } else {
-        sizes_[i] = IntDivider<index_t>(1);
-      }
-      for (int arg = 0; arg < NARGS; arg++) {
-        int64_t element_size = (element_sizes == nullptr ? 1LL : element_sizes[arg]);
-        strides_[i][arg] = i < dims ? strides[arg][i] / element_size : 0;
-      }
-    }
-  }
-
-  C10_HOST_DEVICE offset_type get(index_t linear_idx) const {
-    offset_type offsets;
-#pragma unroll
-    for (int arg = 0; arg < NARGS; arg++) {
-      offsets[arg] = 0;
-    }
-
-#pragma unroll
-    for (int dim = 0; dim < MAX_DIMS; ++dim) {
-      if (dim == dims) {
-        break;
-      }
-      auto divmod = sizes_[dim].divmod(linear_idx);
-      linear_idx = divmod.div;
-
-#pragma unroll
-      for (int arg = 0; arg < NARGS; arg++) {
-        offsets[arg] += divmod.mod * strides_[dim][arg];
-      }
-    }
-    return offsets;
-  }
-
-  int dims;
-  IntDivider<index_t> sizes_[MAX_DIMS];
-  index_t strides_[MAX_DIMS][std::max<int>(NARGS, 1)];
-};
-
-template <int NARGS, typename index_t = uint32_t>
-struct TrivialOffsetCalculator {
-  // The offset for each argument. Wrapper around fixed-size array.
-  // The offsets are in # of elements, not in bytes.
-  // On CUDA, zero sized array is not allowed, so when we are handling nullary
-  // operators, we need to create a size 1 offset to avoid compiler failure.
-  // This size 1 offset is just a placeholder, and we will not use it.
-  using offset_type = at::detail::Array<index_t, std::max<int>(NARGS, 1)>;
-
-  C10_HOST_DEVICE offset_type get(index_t linear_idx) const {
-    offset_type offsets;
-#pragma unroll
-    for (int arg = 0; arg < NARGS; arg++) {
-      offsets[arg] = linear_idx;
-    }
-    return offsets;
-  }
-};
-
-template <int N>
-static OffsetCalculator<N> make_offset_calculator(const at::TensorIterator &iter) {
-  AT_ASSERT(N <= iter.ntensors());
-  std::array<const int64_t *, N> strides;
-  for (int i = 0; i < N; i++) {
-    strides[i] = iter.strides(i).data();
-  }
-  return OffsetCalculator<N>(iter.ndim(), iter.shape().data(), strides.data());
-}
diff --git a/src/torchcsprng/block_cipher.cuh b/src/torchcsprng/block_cipher.cuh
index 5e0f27c..8c922e3 100644
--- a/src/torchcsprng/block_cipher.cuh
+++ b/src/torchcsprng/block_cipher.cuh
@@ -8,136 +8,98 @@
 #pragma once
 
 #include "macros.cuh"
-#include <ATen/ATen.h>
-#include <ATen/native/TensorIterator.h>
-#include "OffsetCalculator.cuh"
-#include <ATen/Parallel.h>
 #include <cstdint>
-#include <mutex>
-
-#if defined(__CUDACC__) || defined(__HIPCC__)
-#include <c10/cuda/CUDAStream.h>
-#include <ATen/cuda/Exceptions.h>
-#else
-#error "CUDA not found"
-#endif
-
-#if defined(__CUDACC__) || defined(__HIPCC__)
-#define UNROLL_IF_CUDA #pragma unroll
-#else
-#error "CUDA not found"
-#define UNROLL_IF_CUDA
-#endif
+#include <cstring>
+#include <cuda_runtime.h>
 
 namespace torch {
 namespace csprng {
 
-template <typename input_index_calc_t>
-TORCH_CSPRNG_HOST_DEVICE static void copy_input_to_block(int64_t idx, uint8_t *block, int block_size, void *input_ptr,
-  int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc) {
+TORCH_CSPRNG_HOST_DEVICE static void copy_input_to_block(
+  int64_t idx, uint8_t *block, int block_size, void *input_ptr, int64_t input_numel, int input_type_size) {
   for (auto i = 0; i < block_size / input_type_size; ++i) {
     const auto linear_index = idx * (block_size / input_type_size) + i;
     if (linear_index < input_numel) {
-      std::memcpy(block + i * input_type_size,
-        &(reinterpret_cast<uint8_t *>(input_ptr)[input_index_calc(linear_index)]), input_type_size);
+      std::memcpy(
+        block + i * input_type_size, &(reinterpret_cast<uint8_t *>(input_ptr)[linear_index]), input_type_size);
     }
   }
 }
 
-template <typename output_index_calc_t>
 TORCH_CSPRNG_HOST_DEVICE static void copy_block_to_output(int64_t idx, uint8_t *block, int output_elem_per_block,
-  void *output_ptr, int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc) {
+  void *output_ptr, int64_t output_numel, int output_type_size) {
   for (auto i = 0; i < output_elem_per_block; ++i) {
     const auto linear_index = idx * output_elem_per_block + i;
     if (linear_index < output_numel) {
-      std::memcpy(&(reinterpret_cast<uint8_t *>(output_ptr)[output_index_calc(linear_index)]),
-        block + i * output_type_size, output_type_size);
+      std::memcpy(
+        &(reinterpret_cast<uint8_t *>(output_ptr)[linear_index]), block + i * output_type_size, output_type_size);
     }
   }
 }
 
-template <int block_size, typename cipher_t, typename input_index_calc_t, typename output_index_calc_t,
-  typename transform_t>
+template <int block_size, typename cipher_t, typename transform_t>
 TORCH_CSPRNG_HOST_DEVICE static void block_cipher_kernel_helper(int64_t idx, cipher_t cipher, int output_elem_per_block,
-  void *input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, void *output_ptr,
-  int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc, transform_t transform) {
+  void *input_ptr, int64_t input_numel, int input_type_size, void *output_ptr, int64_t output_numel,
+  int output_type_size, transform_t transform) {
   uint8_t block[block_size];
   // std::memset(&block, 0, block_size);  // is it ok to use zeros as padding?
   // No need to pad because we ensure `input_size_bytes % block_t_size == 0` previously in lib.cpp.
   // In this application, we require users to pass in the input that is a multiple of block_size.
   // So zero padding never actually happens and it is ok.
   if (input_ptr != nullptr) {
-    copy_input_to_block(idx, block, block_size, input_ptr, input_numel, input_type_size, input_index_calc);
+    copy_input_to_block(idx, block, block_size, input_ptr, input_numel, input_type_size);
   }
   cipher(idx, block);
   transform(block);
-  copy_block_to_output(
-    idx, block, output_elem_per_block, output_ptr, output_numel, output_type_size, output_index_calc);
+  copy_block_to_output(idx, block, output_elem_per_block, output_ptr, output_numel, output_type_size);
 }
 
 #if defined(__CUDACC__) || defined(__HIPCC__)
-template <int block_size, typename cipher_t, typename input_index_calc_t, typename output_index_calc_t,
-  typename transform_t>
+template <int block_size, typename cipher_t, typename transform_t>
 __global__ static void block_cipher_kernel_cuda(cipher_t cipher, int output_elem_per_block, void *input_ptr,
-  int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, void *output_ptr, int64_t output_numel,
-  int output_type_size, output_index_calc_t output_index_calc, transform_t transform) {
+  int64_t input_numel, int input_type_size, void *output_ptr, int64_t output_numel, int output_type_size,
+  transform_t transform) {
   const auto idx = blockIdx.x * blockDim.x + threadIdx.x;
   block_cipher_kernel_helper<block_size>(idx, cipher, output_elem_per_block, input_ptr, input_numel, input_type_size,
-    input_index_calc, output_ptr, output_numel, output_type_size, output_index_calc, transform);
+    output_ptr, output_numel, output_type_size, transform);
 }
 #else
 #error "CUDA not found"
 #endif
 
-template <int block_size, typename cipher_t, typename input_index_calc_t, typename output_index_calc_t,
-  typename transform_t>
-void block_cipher(void *input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc,
-  void *output_ptr, int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc,
-  at::Device device, cipher_t cipher, int output_elem_per_block, transform_t transform_func) {
+template <int block_size, typename cipher_t, typename transform_t>
+int block_cipher(void *input_ptr, int64_t input_numel, int input_type_size, void *output_ptr, int64_t output_numel,
+  int output_type_size, cipher_t cipher, int output_elem_per_block, transform_t transform_func) {
   if (output_ptr == nullptr || output_numel == 0) {
-    return;
+    return -1;
   }
 
-  if (device.type() == at::kCPU) {
-    TORCH_CHECK(false, "torchcsprng was compiled with only CUDA support");
-  } else if (device.type() == at::kCUDA) {
 #if defined(__CUDACC__) || defined(__HIPCC__)
-    const auto threads = 256;
-    const auto grid = (output_numel + (threads * output_elem_per_block) - 1) / (threads * output_elem_per_block);
-    auto stream = at::cuda::getCurrentCUDAStream();
-    block_cipher_kernel_cuda<block_size><<<grid, threads, 0, stream>>>(cipher, output_elem_per_block, input_ptr,
-      input_numel, input_type_size, input_index_calc, output_ptr, output_numel, output_type_size, output_index_calc,
-      transform_func);
-    AT_CUDA_CHECK(cudaGetLastError());
+  const auto threads = 256;
+  const auto grid = (output_numel + (threads * output_elem_per_block) - 1) / (threads * output_elem_per_block);
+  block_cipher_kernel_cuda<block_size><<<grid, threads>>>(cipher, output_elem_per_block, input_ptr, input_numel,
+    input_type_size, output_ptr, output_numel, output_type_size, transform_func);
+  return cudaGetLastError();
 #else
 #error "CUDA not found"
-    TORCH_CHECK(false, "torchcsprng was compiled without CUDA support");
 #endif
-  } else {
-    TORCH_CHECK(false, "block_cipher supports only CPU and CUDA devices");
-  }
 }
 
 template <int block_size, typename cipher_t>
-void block_cipher(at::Tensor input, cipher_t cipher) {
-  const auto input_ptr = input.data_ptr();
-  const auto input_numel = input.numel();
+int block_cipher(uint8_t *buf, size_t buf_size, cipher_t cipher) {
+  // We have ensured `buf_size % 16 == 0` in front
+  const auto input_ptr = reinterpret_cast<uint32_t *>(buf);
+  const auto input_numel = buf_size / 4;
 
-  // Otherwise OffsetCalculator/IntDivider crashes with integer division by zero
+  // Otherwise IntDivider crashes with integer division by zero
   if (input_ptr == nullptr || input_numel == 0) {
-    return;
+    return -1;
   }
 
-  const auto input_type_size = input.element_size();
-  const auto input_offset_calc = make_offset_calculator<1>(at::TensorIterator::nullary_op(input));
-  const auto input_index_calc = [input_offset_calc] TORCH_CSPRNG_HOST_DEVICE(
-                                  uint32_t li) -> uint32_t { return input_offset_calc.get(li)[0]; };
-
-  const auto device = input.device();
+  const auto input_type_size = 4;
 
-  torch::csprng::block_cipher<block_size>(input_ptr, input_numel, input_type_size, input_index_calc, input_ptr,
-    input_numel, input_type_size, input_index_calc, device, cipher, block_size / input_type_size,
-    [] TORCH_CSPRNG_HOST_DEVICE(uint8_t * x) {});
+  return block_cipher<block_size>(input_ptr, input_numel, input_type_size, input_ptr, input_numel, input_type_size,
+    cipher, block_size / input_type_size, [] TORCH_CSPRNG_HOST_DEVICE(uint8_t * x) {});
 }
 
 }  // namespace csprng
diff --git a/src/torchcsprng/kernels.cu b/src/torchcsprng/kernels.cu
index 6f1567f..704b5a3 100644
--- a/src/torchcsprng/kernels.cu
+++ b/src/torchcsprng/kernels.cu
@@ -5,14 +5,12 @@
  * LICENSE file in the root directory of this source tree.
  */
 
+#include <cassert>
 #include "kernels.cuh"
-#include <ATen/Tensor.h>
 #include "block_cipher.cuh"
 #include "aes.cuh"
 #include "owcf.cuh"
 
-using at::Tensor;
-
 namespace torch {
 namespace csprng {
 namespace cuda {
@@ -30,34 +28,26 @@ namespace cuda {
 
 void check_cipher(const std::string &cipher, size_t key_size) {
   if (cipher == "aes128") {
-    TORCH_CHECK(key_size == 16, "key tensor must have 16 bytes(128 bits)");
+    assert((void("key tensor must have 16 bytes(128 bits)"), key_size == 16));
   } else {
-    TORCH_CHECK(false, "encrypt/decrypt supports \"aes128\" cipher, \"", cipher, "\" is not supported.");
+    assert((void("encrypt/decrypt only supports 'aes128' cipher"), false));
   }
 }
 
-void aes_ecb_encrypt(Tensor input, const uint8_t *key_bytes) {
+int aes_ecb_encrypt(uint8_t *buf, size_t buf_size, const uint8_t *key_bytes) {
   uint8_t round_key[aes::round_key_t_size];
   aes::KeyExpansion(round_key, key_bytes);
-  block_cipher<aes::block_t_size>(input, [round_key] TORCH_CSPRNG_HOST_DEVICE(int64_t idx, uint8_t * block) -> void {
-    owcf::matyas_meyer_oseas(block, round_key);
-  });
+  return block_cipher<aes::block_t_size>(
+    buf, buf_size, [round_key] TORCH_CSPRNG_HOST_DEVICE(int64_t idx, uint8_t * block) -> void {
+      owcf::matyas_meyer_oseas(block, round_key);
+    });
 }
 
-Tensor encrypt(Tensor buf, const uint8_t *key, size_t key_size, const std::string &cipher) {
-  const auto input_size_bytes = buf.numel() * buf.itemsize();
-  // const auto input_size_bytes_rounded =
-  //   (input_size_bytes + aes::block_t_size - 1) / aes::block_t_size * aes::block_t_size;
-  // TORCH_CHECK(output_size_bytes == input_size_bytes_rounded, "output size in bytes(", output_size_bytes,
-  //   ") is not equal to input size in bytes rounded to block size(", input_size_bytes_rounded, ")");
-  // No need to check because we ensure `input_size_bytes % block_t_size == 0` previously in lib.cpp.
+int encrypt(uint8_t *buf, size_t buf_size, const uint8_t *key, size_t key_size, const std::string &cipher) {
   check_cipher(cipher, key_size);
-  aes_ecb_encrypt(buf, key);
-  return buf;
+  return aes_ecb_encrypt(buf, buf_size, key);
 }
 
-// The original kernels_body.inc ends here
-
 }  // namespace cuda
 }  // namespace csprng
 }  // namespace torch
diff --git a/src/torchcsprng/kernels.cuh b/src/torchcsprng/kernels.cuh
index 6dab64f..d04bb01 100644
--- a/src/torchcsprng/kernels.cuh
+++ b/src/torchcsprng/kernels.cuh
@@ -7,9 +7,9 @@
 
 #pragma once
 
-#include <ATen/Tensor.h>
-
-using at::Tensor;
+#include <string>
+#include <cstddef>
+#include <cstdint>
 
 namespace torch {
 namespace csprng {
@@ -26,7 +26,7 @@ namespace cuda {
 
 // ================================================Encrypt/Decrypt=====================================================
 
-Tensor encrypt(Tensor buf, const uint8_t *key, size_t key_size, const std::string &cipher);
+int encrypt(uint8_t *buf, size_t buf_size, const uint8_t *key, size_t key_size, const std::string &cipher);
 
 // The original kernels_body.inc ends here
 

From edd809db9d486e2db77fdc37064e2df5f6a1eed6 Mon Sep 17 00:00:00 2001
From: myl7 <myl@myl.moe>
Date: Fri, 12 Apr 2024 12:37:44 +0800
Subject: [PATCH 09/10] Update exported names

---
 CMakeLists.txt       |  2 +-
 include/fssprgcuda.h |  3 ++-
 src/fssprgcuda.cpp   | 10 ++++------
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 02b5ace..e7e828f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,7 +3,6 @@ cmake_minimum_required(VERSION 3.22)
 # Set env `CUDACXX=/absolute/path/to/nvcc` to enable the CUDA language
 # if nvcc is not in the PATH.
 project(fss-prg-cuda LANGUAGES CUDA CXX)
-set(CMAKE_CXX_STANDARD 17)
 
 # If the builder is confused by a soft link from /usr/local/cuda-* to /usr/local/cuda,
 # pass `-DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-*` to CMake to locate CUDA.
@@ -17,6 +16,7 @@ add_library(
   src/torchcsprng/owcf.cu
   src/torchcsprng/aes.cu
 )
+target_compile_features(fssprgcuda PUBLIC cxx_std_17)
 set_target_properties(fssprgcuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
 target_include_directories(fssprgcuda PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
 target_compile_options(fssprgcuda PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
diff --git a/include/fssprgcuda.h b/include/fssprgcuda.h
index 5907d1d..2a6ed56 100644
--- a/include/fssprgcuda.h
+++ b/include/fssprgcuda.h
@@ -3,10 +3,11 @@
 
 #pragma once
 
+#include <cstddef>
 #include <cstdint>
 
 namespace fssprgcuda {
 
-void matyas_meyer_oseas_aes128(uint8_t *buf, int64_t buf_size, const uint8_t *key);
+int Aes128MatyasMeyerOseas(uint8_t *buf, size_t buf_size, const uint8_t *key, size_t key_size);
 
 }  // namespace fssprgcuda
diff --git a/src/fssprgcuda.cpp b/src/fssprgcuda.cpp
index 192c8fd..dd4a0b8 100644
--- a/src/fssprgcuda.cpp
+++ b/src/fssprgcuda.cpp
@@ -1,16 +1,14 @@
 // Copyright (C) myl7
 // SPDX-License-Identifier: BSD-3-Clause
 
-#include "fssprgcuda.h"
+#include <fssprgcuda.h>
 #include "torchcsprng/kernels.cuh"
 
-using torch::csprng::cuda::encrypt;
-
-constexpr size_t block_t_size = 16;
-
 namespace fssprgcuda {
 
-int matyas_meyer_oseas_aes128(uint8_t *buf, size_t buf_size, const uint8_t *key, size_t key_size) {
+using torch::csprng::cuda::encrypt;
+
+int Aes128MatyasMeyerOseas(uint8_t *buf, size_t buf_size, const uint8_t *key, size_t key_size) {
   return encrypt(buf, buf_size, key, key_size, "aes128");
 }
 

From 10350c046e060e6f976891804ae635112ab9a1f0 Mon Sep 17 00:00:00 2001
From: myl7 <myl@myl.moe>
Date: Fri, 12 Apr 2024 20:12:01 +0800
Subject: [PATCH 10/10] Link C++ and CUDA with Rust

Going to fix the test
---
 .gitignore                 |  8 +++-
 CMakeLists.txt             |  2 +-
 Cargo.lock                 | 93 ++++++++++++++++++++++++++++++++++++++
 Cargo.toml                 | 15 ++++++
 build.rs                   | 10 ++++
 src/lib.rs                 | 62 +++++++++++++++++++++++++
 src/torchcsprng/kernels.cu |  4 +-
 7 files changed, 190 insertions(+), 4 deletions(-)
 create mode 100644 Cargo.lock
 create mode 100644 Cargo.toml
 create mode 100644 build.rs
 create mode 100644 src/lib.rs

diff --git a/.gitignore b/.gitignore
index f683121..4f18e5e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -35,5 +35,9 @@
 .idea
 .vscode
 
-# Build
-/build
+# Build dir
+build
+cmake-build-*
+
+# cargo
+/target
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e7e828f..8f01b9f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,6 +17,6 @@ add_library(
   src/torchcsprng/aes.cu
 )
 target_compile_features(fssprgcuda PUBLIC cxx_std_17)
-set_target_properties(fssprgcuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+set_target_properties(fssprgcuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON  CUDA_RESOLVE_DEVICE_SYMBOLS ON)
 target_include_directories(fssprgcuda PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
 target_compile_options(fssprgcuda PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..a078128
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,93 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "aes"
+version = "0.8.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0"
+dependencies = [
+ "cfg-if",
+ "cipher",
+ "cpufeatures",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "cipher"
+version = "0.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad"
+dependencies = [
+ "crypto-common",
+ "inout",
+]
+
+[[package]]
+name = "cpufeatures"
+version = "0.2.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "crypto-common"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
+dependencies = [
+ "generic-array",
+ "typenum",
+]
+
+[[package]]
+name = "fss-prg-cuda"
+version = "0.1.0"
+dependencies = [
+ "aes",
+]
+
+[[package]]
+name = "generic-array"
+version = "0.14.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
+dependencies = [
+ "typenum",
+ "version_check",
+]
+
+[[package]]
+name = "inout"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5"
+dependencies = [
+ "generic-array",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.152"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7"
+
+[[package]]
+name = "typenum"
+version = "1.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
+
+[[package]]
+name = "version_check"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..959074e
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "fss-prg-cuda"
+version = "0.1.0"
+edition = "2021"
+authors = ["myl7 <myl@myl.moe>"]
+# description = ""
+license = "BSD-3-Clause"
+homepage = "https://github.com/myl7/fss-prg-cuda"
+# documentation = "https://docs.rs/fss-prg-cuda"
+repository = "https://github.com/myl7/fss-prg-cuda.git"
+keywords = ["crypto", "fss", "prg", "cuda"]
+categories = ["cryptography"]
+
+[dev-dependencies]
+aes = "0.8.4"
diff --git a/build.rs b/build.rs
new file mode 100644
index 0000000..eb5f1b7
--- /dev/null
+++ b/build.rs
@@ -0,0 +1,10 @@
+fn main() {
+    println!("cargo:rustc-link-search={}", "build");
+    println!("cargo:rustc-link-search={}", "/usr/local/cuda/lib64");
+
+    println!("cargo:rustc-link-lib=static={}", "fssprgcuda");
+    println!("cargo:rerun-if-changed={}", "build/libfssprgcuda.a");
+
+    println!("cargo:rustc-link-lib=dylib={}", "stdc++");
+    println!("cargo:rustc-link-lib=dylib={}", "cudart");
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..6571ca8
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,62 @@
+// Copyright (C) myl7
+// SPDX-License-Identifier: BSD-3-Clause
+
+pub mod ffi {
+    use std::ffi::c_int;
+
+    extern "C" {
+        #[link_name = "_ZN10fssprgcuda22Aes128MatyasMeyerOseasEPhmPKhm"]
+        pub fn aes128_matyas_meyer_oseas(
+            buf: *mut u8,
+            buf_size: usize,
+            key: *const u8,
+            key_size: usize,
+        ) -> c_int;
+    }
+}
+
+pub fn aes128_matyas_meyer_oseas(buf: &mut [u8], key: &[u8]) -> i32 {
+    unsafe {
+        ffi::aes128_matyas_meyer_oseas(buf.as_mut_ptr(), buf.len(), key.as_ptr(), key.len()) as i32
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use aes::cipher::generic_array::GenericArray;
+    use aes::cipher::{BlockEncrypt, KeyInit};
+    use aes::Aes128;
+
+    use super::*;
+
+    fn xor_inplace(lhs: &mut [u8], rhs: &[u8]) {
+        lhs.iter_mut().zip(rhs.iter()).for_each(|(lb, rb)| {
+            *lb ^= rb;
+        });
+    }
+
+    fn aes128_matyas_meyer_oseas_alt(buf: &mut [u8], key: &[u8]) {
+        assert_eq!(buf.len(), key.len());
+        assert_eq!(buf.len() % 16, 0);
+        (0..buf.len() / 16).for_each(|i| {
+            let key_block = GenericArray::from_slice(&key[i * 16..(i + 1) * 16]);
+            let cipher = Aes128::new(key_block);
+            let in_block = GenericArray::from_slice(&mut buf[i * 16..(i + 1) * 16]);
+            let mut out_block = GenericArray::default();
+            cipher.encrypt_block_b2b(in_block, &mut out_block);
+            xor_inplace(&mut buf[i * 16..(i + 1) * 16], &out_block);
+        });
+    }
+
+    const BUF: &[u8] = b"g\xf1U\xf4\xc3-k\x8b\xb8\xcdA\x0c\xebQE\x97@\xb5\xf9\xca\x9278\xca\xb9\x82\xc1\xa1IR\x1d$\x92\x7fE\x18\xbd\t<(\xa5\x99[\x84\x95\x07L\x06'`\x0cU\xde\xb3\x0e\xa3\xfd`|\x96\xf5?\xe9\x04";
+    const KEY: &[u8] = b"\xf0>\xc0\x8c\x1d|8m\x13oOm\xd4\xd46\x13\xfdk\x99\xa6\x10\xe8yj\xf1\x96\xc4\x9b\xc2jZ\xbf\xe8\xb1\x8ab\xe9n\x02\x07\xc6\xb6\xd7M\xc3[5\x13\xa5`\xef?\xc8| \xff\x16\xc0\xeaO&\xc5n\x9a";
+
+    #[test]
+    fn test_aes128_matyas_meyer_oseas() {
+        let mut buf = BUF.to_owned();
+        aes128_matyas_meyer_oseas(&mut buf, KEY);
+        let mut buf_alt = BUF.to_owned();
+        aes128_matyas_meyer_oseas_alt(&mut buf_alt, KEY);
+        assert_eq!(buf, buf_alt);
+    }
+}
diff --git a/src/torchcsprng/kernels.cu b/src/torchcsprng/kernels.cu
index 704b5a3..c3ce3fd 100644
--- a/src/torchcsprng/kernels.cu
+++ b/src/torchcsprng/kernels.cu
@@ -28,7 +28,8 @@ namespace cuda {
 
 void check_cipher(const std::string &cipher, size_t key_size) {
   if (cipher == "aes128") {
-    assert((void("key tensor must have 16 bytes(128 bits)"), key_size == 16));
+    // TODO: Different check
+    // assert((void("key tensor must have 16 bytes(128 bits)"), key_size == 16));
   } else {
     assert((void("encrypt/decrypt only supports 'aes128' cipher"), false));
   }
@@ -44,6 +45,7 @@ int aes_ecb_encrypt(uint8_t *buf, size_t buf_size, const uint8_t *key_bytes) {
 }
 
 int encrypt(uint8_t *buf, size_t buf_size, const uint8_t *key, size_t key_size, const std::string &cipher) {
+  // TODO: More checks
   check_cipher(cipher, key_size);
   return aes_ecb_encrypt(buf, buf_size, key);
 }