You can deploy a deep learning VM with an NVIDIA RAG workload using a pgvector PostgreSQL database managed by VMware Data Services Manager.

For information about the NVIDIA RAG workload, see the NVIDIA RAG Applications Docker Compose documentation (requires specific account permissions).

Prerequisites

Procedure

  1. If, as a data scientist, you are deploying the deep learning VM by using a catalog item in VMware Aria Automation, you provide the details of the pgvector PostgreSQL database after you deploy the virtual machine.
    1. Deploy a RAG workstation with a Vector Database by Using a Self-Service Catalog Item in VMware Aria Automation.
  2. If, as a DevOps engineer, you are deploying the deep learning VM for a data scientist directly on the vSphere cluster or by using the kubectl command, create a cloud-init script and deploy the deep learning VM.
    1. Create a cloud-init script for NVIDIA RAG and the pgvector PostgreSQL database you have created.
      You can modify the initial version of the cloud-init script for NVIDIA RAG. For example, for NVIDIA RAG 24.08 and a pgvector PostgreSQL database with connection details postgres://pgvector_db_admin:encoded_pgvector_db_admin_password@pgvector_db_ip_address:5432/pgvector_db_name.
      #cloud-config
      write_files:
      - path: /opt/dlvm/dl_app.sh
        permissions: '0755'
        content: |
          #!/bin/bash
          set -eu
          source /opt/dlvm/utils.sh
          trap 'error_exit "Unexpected error occurs at dl workload"' ERR
          set_proxy "http" "https"
          
          sudo mkdir -p /opt/data/
          sudo chown vmware:vmware /opt/data
          sudo chmod -R 775 /opt/data
          cd /opt/data/
      
          cat <<EOF > /opt/data/config.json
          {
            "_comment_1": "This provides default support for RAG v24.08: llama3-8b-instruct model",
            "_comment_2": "Update llm_ms_gpu_id: specifies the GPU device ID to make available to the inference server when using multiple GPU",
            "_comment_3": "Update embedding_ms_gpu_id: specifies the GPU ID used for embedding model processing when using multiple GPU",
            "rag": {
              "org_name": "nvidia",
              "org_team_name": "aiworkflows",
              "rag_name": "ai-chatbot-docker-workflow",
              "rag_version": "24.08",
              "rag_app": "rag-app-multiturn-chatbot",
              "nim_model_profile": "auto",
              "llm_ms_gpu_id": "0",
              "embedding_ms_gpu_id": "0",
              "model_directory": "model-cache",
              "ngc_cli_version": "3.41.2"
            }
          }
          EOF
      
          CONFIG_JSON=$(cat "/opt/data/config.json")
          required_vars=("ORG_NAME" "ORG_TEAM_NAME" "RAG_NAME" "RAG_VERSION" "RAG_APP" "NIM_MODEL_PROFILE" "LLM_MS_GPU_ID" "EMBEDDING_MS_GPU_ID" "MODEL_DIRECTORY" "NGC_CLI_VERSION")
      
          # Extract rag values from /opt/data/config.json
          for index in "${!required_vars[@]}"; do
            key="${required_vars[$index]}"
            jq_query=".rag.${key,,} | select (.!=null)"
            value=$(echo "${CONFIG_JSON}" | jq -r "${jq_query}")
            if [[ -z "${value}" ]]; then 
              error_exit "${key} is required but not set."
            else
              eval ${key}=\""${value}"\"
            fi
          done
      
          # Read parameters from config-json to connect DSM PGVector on RAG
          CONFIG_JSON_BASE64=$(grep 'config-json' /opt/dlvm/ovf-env.xml | sed -n 's/.*oe:value="\([^"]*\).*/\1/p')
          CONFIG_JSON_PGVECTOR=$(echo "${CONFIG_JSON_BASE64}" | base64 -d)
          PGVECTOR_VALUE=$(echo ${CONFIG_JSON_PGVECTOR} | jq -r '.rag.pgvector')
          if [[ -n "${PGVECTOR_VALUE}" && "${PGVECTOR_VALUE}" != "null" ]]; then
            echo "Info: extract DSM PGVector parameters from config-json in XML"
            POSTGRES_USER=$(echo ${PGVECTOR_VALUE} | awk -F[:@/] '{print $4}')
            POSTGRES_PASSWORD=$(echo ${PGVECTOR_VALUE} | awk -F[:@/] '{print $5}')
            POSTGRES_HOST_IP=$(echo ${PGVECTOR_VALUE} | awk -F[:@/] '{print $6}')
            POSTGRES_PORT_NUMBER=$(echo ${PGVECTOR_VALUE} | awk -F[:@/] '{print $7}')
            POSTGRES_DB=$(echo ${PGVECTOR_VALUE} | awk -F[:@/] '{print $8}')
      
            for var in POSTGRES_USER POSTGRES_PASSWORD POSTGRES_HOST_IP POSTGRES_PORT_NUMBER POSTGRES_DB; do
              if [ -z "${!var}" ]; then
                error_exit "${var} is not set."
              fi
            done
          fi
      
          gpu_info=$(nvidia-smi -L)
          echo "Info: the detected GPU info, $gpu_info"
          if [[ ${NIM_MODEL_PROFILE} == "auto" ]]; then 
            case "${gpu_info}" in
              *A100*)
                NIM_MODEL_PROFILE="751382df4272eafc83f541f364d61b35aed9cce8c7b0c869269cea5a366cd08c"
                echo "Info: GPU type A100 detected. Setting tensorrt_llm-A100-fp16-tp1-throughput as the default NIM model profile."
                ;;
              *H100*)
                NIM_MODEL_PROFILE="cb52cbc73a6a71392094380f920a3548f27c5fcc9dab02a98dc1bcb3be9cf8d1"
                echo "Info: GPU type H100 detected. Setting tensorrt_llm-H100-fp16-tp1-throughput as the default NIM model profile."
                ;;
              *L40S*)
                NIM_MODEL_PROFILE="d8dd8af82e0035d7ca50b994d85a3740dbd84ddb4ed330e30c509e041ba79f80"
                echo "Info: GPU type L40S detected. Setting tensorrt_llm-L40S-fp16-tp1-throughput as the default NIM model profile."
                ;;
              *)
                NIM_MODEL_PROFILE="8835c31752fbc67ef658b20a9f78e056914fdef0660206d82f252d62fd96064d"
                echo "Info: No supported GPU type detected (A100, H100, L40S). Setting vllm as the default NIM model profile."
                ;;
            esac
          else
            echo "Info: using the NIM model profile provided by the user, $NIM_MODEL_PROFILE"
          fi
      
          RAG_URI="${ORG_NAME}/${ORG_TEAM_NAME}/${RAG_NAME}:${RAG_VERSION}"
          RAG_FOLDER="${RAG_NAME}_v${RAG_VERSION}"
          NGC_CLI_URL="https://api.ngc.nvidia.com/v2/resources/nvidia/ngc-apps/ngc_cli/versions/${NGC_CLI_VERSION}/files/ngccli_linux.zip"
      
          if [ ! -f .initialize ]; then
            # clean up
            rm -rf compose.env ngc* ${RAG_NAME}* ${MODEL_DIRECTORY}* .initialize
      
            # install ngc-cli
            wget --content-disposition ${NGC_CLI_URL} -O ngccli_linux.zip && unzip -q ngccli_linux.zip
            export PATH=`pwd`/ngc-cli:${PATH}
      
            APIKEY=""
            DEFAULT_REG_URI="nvcr.io"
      
            REGISTRY_URI_PATH=$(grep registry-uri /opt/dlvm/ovf-env.xml | sed -n 's/.*oe:value="\([^"]*\).*/\1/p')
            if [[ -z "${REGISTRY_URI_PATH}" ]]; then
              REGISTRY_URI_PATH=${DEFAULT_REG_URI}
              echo "Info: registry uri was empty. Using default: ${REGISTRY_URI_PATH}"
            fi
      
            if [[ "$(grep registry-uri /opt/dlvm/ovf-env.xml | sed -n 's/.*oe:value="\([^"]*\).*/\1/p')" == *"${DEFAULT_REG_URI}"* ]]; then
              APIKEY=$(grep registry-passwd /opt/dlvm/ovf-env.xml | sed -n 's/.*oe:value="\([^"]*\).*/\1/p')
            fi
      
            if [ -z "${APIKEY}" ]; then
                error_exit "No APIKEY found"
            fi
      
            # config ngc-cli
            mkdir -p ~/.ngc
      
            cat << EOF > ~/.ngc/config
            [CURRENT]
            apikey = ${APIKEY}
            format_type = ascii
            org = ${ORG_NAME}
            team = ${ORG_TEAM_NAME}
            ace = no-ace
          EOF
            
            # Extract registry URI if path contains '/'
            if [[ ${REGISTRY_URI_PATH} == *"/"* ]]; then
              REGISTRY_URI=$(echo "${REGISTRY_URI_PATH}" | cut -d'/' -f1)
            else
              REGISTRY_URI=${REGISTRY_URI_PATH}
            fi
      
            REGISTRY_USER=$(grep registry-user /opt/dlvm/ovf-env.xml | sed -n 's/.*oe:value="\([^"]*\).*/\1/p')
      
            # Docker login if credentials are provided
            if [[ -n "${REGISTRY_USER}" && -n "${APIKEY}" ]]; then
              docker login -u ${REGISTRY_USER} -p ${APIKEY} ${REGISTRY_URI}
            else
              echo "Warning: the ${REGISTRY_URI} registry's username and password are invalid, Skipping Docker login."
            fi
      
            # DockerHub login for general components
            DOCKERHUB_URI=$(grep registry-2-uri /opt/dlvm/ovf-env.xml | sed -n 's/.*oe:value="\([^"]*\).*/\1/p')
            DOCKERHUB_USERNAME=$(grep registry-2-user /opt/dlvm/ovf-env.xml | sed -n 's/.*oe:value="\([^"]*\).*/\1/p')
            DOCKERHUB_PASSWORD=$(grep registry-2-passwd /opt/dlvm/ovf-env.xml | sed -n 's/.*oe:value="\([^"]*\).*/\1/p')
      
            DOCKERHUB_URI=${DOCKERHUB_URI:-docker.io}
            if [[ -n "${DOCKERHUB_USERNAME}" && -n "${DOCKERHUB_PASSWORD}" ]]; then
              docker login -u ${DOCKERHUB_USERNAME} -p ${DOCKERHUB_PASSWORD} ${DOCKERHUB_URI}
            else
              echo "Warning: ${DOCKERHUB_URI} not logged in"
            fi
      
            # Download RAG files
            ngc registry resource download-version ${RAG_URI}
      
            mkdir -p /opt/data/${MODEL_DIRECTORY}
      
            # Update the docker-compose YAML files to correct the issue with GPU free/non-free status reporting
            /usr/bin/python3 -c "import yaml, json, sys; print(json.dumps(yaml.safe_load(sys.stdin.read())))" < "${RAG_FOLDER}/docker-compose-nim-ms.yaml"> docker-compose-nim-ms.json
            jq --arg profile "${NIM_MODEL_PROFILE}" \
               '.services."nemollm-inference".environment.NIM_MANIFEST_ALLOW_UNSAFE = "1" |
                .services."nemollm-inference".environment.NIM_MODEL_PROFILE = $profile |
                .services."nemollm-inference".deploy.resources.reservations.devices[0].device_ids = ["${LLM_MS_GPU_ID:-0}"] |
                del(.services."nemollm-inference".deploy.resources.reservations.devices[0].count)' docker-compose-nim-ms.json > temp.json && mv temp.json docker-compose-nim-ms.json
            /usr/bin/python3 -c "import yaml, json, sys; print(yaml.safe_dump(json.load(sys.stdin), default_flow_style=False, sort_keys=False))" < docker-compose-nim-ms.json > "${RAG_FOLDER}/docker-compose-nim-ms.yaml"
            rm -rf docker-compose-nim-ms.json
      
            # Update docker-compose YAML files to config PGVector as the default databse
            /usr/bin/python3 -c "import yaml, json, sys; print(json.dumps(yaml.safe_load(sys.stdin.read())))" < "${RAG_FOLDER}/${RAG_APP}/docker-compose.yaml"> rag-app-multiturn-chatbot.json
            jq '.services."chain-server".environment.APP_VECTORSTORE_NAME = "pgvector" |
               .services."chain-server".environment.APP_VECTORSTORE_URL = "${POSTGRES_HOST_IP:-pgvector}:${POSTGRES_PORT_NUMBER:-5432}" |
               .services."chain-server".environment.POSTGRES_PASSWORD = "${POSTGRES_PASSWORD:-password}" |
               .services."chain-server".environment.POSTGRES_USER = "${POSTGRES_USER:-postgres}" |
               .services."chain-server".environment.POSTGRES_DB = "${POSTGRES_DB:-api}"' rag-app-multiturn-chatbot.json > temp.json && mv temp.json rag-app-multiturn-chatbot.json
            /usr/bin/python3 -c "import yaml, json, sys; print(yaml.safe_dump(json.load(sys.stdin), default_flow_style=False, sort_keys=False))" < rag-app-multiturn-chatbot.json > "${RAG_FOLDER}/${RAG_APP}/docker-compose.yaml"
            rm -rf rag-app-multiturn-chatbot.json
      
            # config compose.env
            cat << EOF > compose.env
            export MODEL_DIRECTORY="/opt/data/${MODEL_DIRECTORY}"
            export NGC_API_KEY=${APIKEY}
            export USERID=$(id -u)
            export LLM_MS_GPU_ID=${LLM_MS_GPU_ID}
            export EMBEDDING_MS_GPU_ID=${EMBEDDING_MS_GPU_ID}
          EOF
      
            if [[ -n "${PGVECTOR_VALUE}" && "${PGVECTOR_VALUE}" != "null" ]]; then 
              cat << EOF >> compose.env
              export POSTGRES_HOST_IP="${POSTGRES_HOST_IP}"
              export POSTGRES_PORT_NUMBER="${POSTGRES_PORT_NUMBER}"
              export POSTGRES_PASSWORD="${POSTGRES_PASSWORD}"
              export POSTGRES_USER="${POSTGRES_USER}"
              export POSTGRES_DB="${POSTGRES_DB}"
          EOF
            fi
          
            touch .initialize
      
            deploy_dcgm_exporter
          fi
      
          # start NGC RAG
          echo "Info: running the RAG application"
          source compose.env
          if [ -z "${PGVECTOR_VALUE}" ] || [ "${PGVECTOR_VALUE}" = "null" ]; then 
            echo "Info: running the pgvector container as the Vector Database"
            docker compose -f ${RAG_FOLDER}/${RAG_APP}/docker-compose.yaml --profile local-nim --profile pgvector up -d
          else
            echo "Info: using the provided DSM PGVector as the Vector Database"
            docker compose -f ${RAG_FOLDER}/${RAG_APP}/docker-compose.yaml --profile local-nim up -d
          fi
          
      - path: /opt/dlvm/utils.sh
        permissions: '0755'
        content: |
          #!/bin/bash
          error_exit() {
            echo "Error: $1" >&2
            vmtoolsd --cmd "info-set guestinfo.vmservice.bootstrap.condition false, DLWorkloadFailure, $1"
            exit 1
          }
      
          check_protocol() {
            local proxy_url=$1
            shift
            local supported_protocols=("$@")
            if [[ -n "${proxy_url}" ]]; then
              local protocol=$(echo "${proxy_url}" | awk -F '://' '{if (NF > 1) print $1; else print ""}')
              if [ -z "$protocol" ]; then
                echo "No specific protocol provided. Skipping protocol check."
                return 0
              fi
              local protocol_included=false
              for var in "${supported_protocols[@]}"; do
                if [[ "${protocol}" == "${var}" ]]; then
                  protocol_included=true
                  break
                fi
              done
              if [[ "${protocol_included}" == false ]]; then
                error_exit "Unsupported protocol: ${protocol}. Supported protocols are: ${supported_protocols[*]}"
              fi
            fi
          }
      
          # $@: list of supported protocols
          set_proxy() {
            local supported_protocols=("$@")
      
            CONFIG_JSON_BASE64=$(grep 'config-json' /opt/dlvm/ovf-env.xml | sed -n 's/.*oe:value="\([^"]*\).*/\1/p')
            CONFIG_JSON=$(echo ${CONFIG_JSON_BASE64} | base64 --decode)
      
            HTTP_PROXY_URL=$(echo "${CONFIG_JSON}" | jq -r '.http_proxy // empty')
            HTTPS_PROXY_URL=$(echo "${CONFIG_JSON}" | jq -r '.https_proxy // empty')
            if [[ $? -ne 0 || (-z "${HTTP_PROXY_URL}" && -z "${HTTPS_PROXY_URL}") ]]; then
              echo "Info: The config-json was parsed, but no proxy settings were found."
              return 0
            fi
      
            check_protocol "${HTTP_PROXY_URL}" "${supported_protocols[@]}"
            check_protocol "${HTTPS_PROXY_URL}" "${supported_protocols[@]}"
      
            if ! grep -q 'http_proxy' /etc/environment; then
              sudo bash -c 'echo "export http_proxy=${HTTP_PROXY_URL}
              export https_proxy=${HTTPS_PROXY_URL}
              export HTTP_PROXY=${HTTP_PROXY_URL}
              export HTTPS_PROXY=${HTTPS_PROXY_URL}
              export no_proxy=localhost,127.0.0.1" >> /etc/environment'
              source /etc/environment
            fi
            
            # Configure Docker to use a proxy
            sudo mkdir -p /etc/systemd/system/docker.service.d
            sudo bash -c 'echo "[Service]
            Environment=\"HTTP_PROXY=${HTTP_PROXY_URL}\"
            Environment=\"HTTPS_PROXY=${HTTPS_PROXY_URL}\"
            Environment=\"NO_PROXY=localhost,127.0.0.1\"" > /etc/systemd/system/docker.service.d/proxy.conf'
            sudo systemctl daemon-reload
            sudo systemctl restart docker
      
            echo "Info: docker and system environment are now configured to use the proxy settings"
          }
      
          deploy_dcgm_exporter() {
            CONFIG_JSON_BASE64=$(grep 'config-json' /opt/dlvm/ovf-env.xml | sed -n 's/.*oe:value="\([^"]*\).*/\1/p')
            CONFIG_JSON=$(echo ${CONFIG_JSON_BASE64} | base64 --decode)
            DCGM_EXPORT_PUBLIC=$(echo "${CONFIG_JSON}" | jq -r '.export_dcgm_to_public // empty')
      
            DCGM_EXPORTER_IMAGE="$REGISTRY_URI_PATH/nvidia/k8s/dcgm-exporter"
            DCGM_EXPORTER_VERSION="3.2.5-3.1.8-ubuntu22.04"
            if [ -z "${DCGM_EXPORT_PUBLIC}" ] || [ "${DCGM_EXPORT_PUBLIC}" != "true" ]; then
              echo "Info: launching DCGM Exporter to collect vGPU metrics, listening only on localhost (127.0.0.1:9400)"
              docker run -d --gpus all --cap-add SYS_ADMIN -p 127.0.0.1:9400:9400 $DCGM_EXPORTER_IMAGE:$DCGM_EXPORTER_VERSION
            else
              echo "Info: launching DCGM Exporter to collect vGPU metrics, exposed on all network interfaces (0.0.0.0:9400)"
              docker run -d --gpus all --cap-add SYS_ADMIN -p 9400:9400 $DCGM_EXPORTER_IMAGE:$DCGM_EXPORTER_VERSION
            fi
          }
    2. Encode the cloud-init script in base64 format.
      You use a base 64 encoding tool, such as https://decode64base.com/ to generate the encoded version of your cloud-init script.
    3. Create a configuration file in JSON format specifying the pgvector database details.
      “rag”:{“pgvector”:"postgresql://pgadmin:encoded_pgvector_db_admin_password@pgvector_db_ip_address:5432/pgvector_db_name"}

      If you must configure a proxy server for Internet access, add the http_proxy and https_proxy properties to this configuration JSON file.

    4. Deploy the deep learning VM, passing the base64 value of the cloud-init script to the user-data input OVF parameter and of the configuration JSON file to the config-json parameter.