From 4d71208b9a44786aa962116041f910b44ce0512c Mon Sep 17 00:00:00 2001 From: Ayush Saxena Date: Tue, 21 Apr 2026 15:35:10 +0530 Subject: [PATCH 1/3] Add Hive K8s Operators --- packaging/pom.xml | 7 + packaging/src/docker/entrypoint.sh | 2 +- packaging/src/docker/start-hive.sh | 1 + .../docker/storage/ozone/docker-compose.yml | 3 + packaging/src/kubernetes/Dockerfile | 26 + packaging/src/kubernetes/README.md | 860 ++++++++++++++++++ .../config/operator/deployment.yaml | 48 + .../config/rbac/cluster-role-binding.yaml | 26 + .../kubernetes/config/rbac/cluster-role.yaml | 47 + .../config/rbac/service-account.yaml | 19 + .../config/samples/hivecluster-full-ha.yaml | 81 ++ .../config/samples/hivecluster-minimal.yaml | 59 ++ packaging/src/kubernetes/pom.xml | 169 ++++ .../gen/hiveclusters.hive.apache.org-v1.yml | 500 ++++++++++ .../kubernetes/operator/HiveOperatorMain.java | 53 ++ .../dependent/HadoopConfigMapDependent.java | 66 ++ .../dependent/HiveDependentResource.java | 531 +++++++++++ .../HiveServer2ConfigMapDependent.java | 71 ++ .../HiveServer2DeploymentDependent.java | 233 +++++ .../HiveServer2ServiceDependent.java | 71 ++ .../dependent/LlapConfigMapDependent.java | 67 ++ .../dependent/LlapServiceDependent.java | 76 ++ .../dependent/LlapStatefulSetDependent.java | 179 ++++ .../MetastoreConfigMapDependent.java | 66 ++ .../MetastoreDeploymentDependent.java | 169 ++++ .../dependent/MetastoreServiceDependent.java | 68 ++ .../dependent/SchemaInitJobDependent.java | 145 +++ .../dependent/ScratchPvcDependent.java | 89 ++ .../dependent/TezAmServiceDependent.java | 61 ++ .../dependent/TezAmStatefulSetDependent.java | 177 ++++ .../condition/HiveServer2Precondition.java | 53 ++ .../condition/LlapEnabledCondition.java | 41 + .../condition/MetastoreEnabledCondition.java | 39 + .../condition/MetastoreReadyCondition.java | 49 + .../SchemaJobCompletedCondition.java | 48 + .../condition/TezAmEnabledCondition.java | 41 + .../operator/model/HiveCluster.java | 39 + .../operator/model/HiveClusterSpec.java | 93 ++ .../operator/model/HiveClusterStatus.java | 106 +++ .../operator/model/spec/DatabaseConfig.java | 46 + .../operator/model/spec/HadoopSpec.java | 29 + .../operator/model/spec/HiveServer2Spec.java | 66 ++ .../operator/model/spec/LlapSpec.java | 68 ++ .../operator/model/spec/MetastoreSpec.java | 69 ++ .../operator/model/spec/ProbeSpec.java | 35 + .../model/spec/ResourceRequirementsSpec.java | 42 + .../operator/model/spec/SecretKeyRef.java | 29 + .../operator/model/spec/TezAmSpec.java | 64 ++ .../operator/model/spec/ZookeeperSpec.java | 33 + .../model/status/ComponentStatus.java | 71 ++ .../reconciler/HiveClusterReconciler.java | 336 +++++++ .../operator/util/HadoopXmlBuilder.java | 71 ++ .../operator/util/HiveConfigBuilder.java | 179 ++++ .../hive/kubernetes/operator/util/Labels.java | 69 ++ .../src/kubernetes/src/resources/log4j2.xml | 29 + pom.xml | 2 + 56 files changed, 5716 insertions(+), 1 deletion(-) create mode 100644 packaging/src/kubernetes/Dockerfile create mode 100644 packaging/src/kubernetes/README.md create mode 100644 packaging/src/kubernetes/config/operator/deployment.yaml create mode 100644 packaging/src/kubernetes/config/rbac/cluster-role-binding.yaml create mode 100644 packaging/src/kubernetes/config/rbac/cluster-role.yaml create mode 100644 packaging/src/kubernetes/config/rbac/service-account.yaml create mode 100644 packaging/src/kubernetes/config/samples/hivecluster-full-ha.yaml create mode 100644 packaging/src/kubernetes/config/samples/hivecluster-minimal.yaml create mode 100644 packaging/src/kubernetes/pom.xml create mode 100644 packaging/src/kubernetes/src/gen/hiveclusters.hive.apache.org-v1.yml create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/HiveOperatorMain.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HadoopConfigMapDependent.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveDependentResource.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ConfigMapDependent.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2DeploymentDependent.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ServiceDependent.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapConfigMapDependent.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapServiceDependent.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapStatefulSetDependent.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreConfigMapDependent.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreDeploymentDependent.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreServiceDependent.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/SchemaInitJobDependent.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/ScratchPvcDependent.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmServiceDependent.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmStatefulSetDependent.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2Precondition.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/LlapEnabledCondition.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreEnabledCondition.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreReadyCondition.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/SchemaJobCompletedCondition.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/TezAmEnabledCondition.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveCluster.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterSpec.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterStatus.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/DatabaseConfig.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HadoopSpec.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HiveServer2Spec.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/LlapSpec.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/MetastoreSpec.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ProbeSpec.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ResourceRequirementsSpec.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/SecretKeyRef.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/TezAmSpec.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ZookeeperSpec.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/status/ComponentStatus.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveClusterReconciler.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/HadoopXmlBuilder.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/HiveConfigBuilder.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/Labels.java create mode 100644 packaging/src/kubernetes/src/resources/log4j2.xml diff --git a/packaging/pom.xml b/packaging/pom.xml index 46949bd66b7f..df4d33309e31 100644 --- a/packaging/pom.xml +++ b/packaging/pom.xml @@ -27,6 +27,9 @@ .. apache-hive-${project.version}-jdbc.jar + + src/kubernetes + dist @@ -272,6 +275,10 @@ + + kubernetes + + diff --git a/packaging/src/docker/entrypoint.sh b/packaging/src/docker/entrypoint.sh index 656bf63ec601..5ec094edce31 100644 --- a/packaging/src/docker/entrypoint.sh +++ b/packaging/src/docker/entrypoint.sh @@ -153,7 +153,7 @@ function run_tezam { exit 1 fi # service_plugins_descriptor.json references org.apache.hadoop.hive.llap.tezplugins.* (hive-llap-tez, etc.) - tezam_cp="${HADOOP_CONF_DIR}:${TEZ_CONF_DIR}:${TEZ_SNAPSHOT_HOME}/*:${TEZ_HOME}/*:${TEZ_HOME}/lib/*:${HIVE_HOME}/lib/*:${HADOOP_HOME}/share/hadoop/common/*:${HADOOP_HOME}/share/hadoop/common/lib/*:${HADOOP_HOME}/share/hadoop/yarn/*:${HADOOP_HOME}/share/hadoop/yarn/lib/*:${HADOOP_HOME}/share/hadoop/hdfs/*:${HADOOP_HOME}/share/hadoop/hdfs/lib/*:${HADOOP_HOME}/share/hadoop/mapreduce/*:${HADOOP_HOME}/share/hadoop/mapreduce/lib/*:${HADOOP_CLASSPATH:-}" + tezam_cp="${HADOOP_CONF_DIR}:${TEZ_CONF_DIR}:${TEZ_SNAPSHOT_HOME}/*:${TEZ_HOME}/*:${TEZ_HOME}/lib/*:${HIVE_HOME}/lib/*:$("${HADOOP_HOME}/bin/hadoop" classpath)" local java_bin local tezam_java_opts diff --git a/packaging/src/docker/start-hive.sh b/packaging/src/docker/start-hive.sh index 82a4c9952458..a76eea2cf31f 100755 --- a/packaging/src/docker/start-hive.sh +++ b/packaging/src/docker/start-hive.sh @@ -44,6 +44,7 @@ for arg in "$@"; do export S3_ENDPOINT_URL="http://s3.ozone:9878" export AWS_ACCESS_KEY_ID="ozone" export AWS_SECRET_ACCESS_KEY="secret" + export HADOOP_OPTIONAL_TOOLS="hadoop-aws" ;; *) echo "Unknown option: $arg" diff --git a/packaging/src/docker/storage/ozone/docker-compose.yml b/packaging/src/docker/storage/ozone/docker-compose.yml index f5cf554b42c4..18a16b6d3138 100644 --- a/packaging/src/docker/storage/ozone/docker-compose.yml +++ b/packaging/src/docker/storage/ozone/docker-compose.yml @@ -34,6 +34,9 @@ x-common-config: OZONE-SITE.XML_hdds.scm.safemode.min.datanode: "1" OZONE-SITE.XML_hdds.scm.safemode.healthy.pipeline.pct: "0" OZONE-SITE.XML_ozone.s3g.domain.name: "s3.ozone" + OZONE-SITE.XML_hdds.datanode.volume.min.free.space: "128MB" + OZONE-SITE.XML_hdds.scm.safemode.pipeline.creation: "false" + OZONE-SITE.XML_ozone.scm.container.size: "256MB" services: datanode: diff --git a/packaging/src/kubernetes/Dockerfile b/packaging/src/kubernetes/Dockerfile new file mode 100644 index 000000000000..9d688e67a7ac --- /dev/null +++ b/packaging/src/kubernetes/Dockerfile @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to you under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM eclipse-temurin:21-jre-ubi9-minimal + +ARG OPERATOR_JAR=target/hive-kubernetes-operator-*-shaded.jar + +WORKDIR /opt/hive-operator + +COPY ${OPERATOR_JAR} operator.jar + +USER 1000:1000 + +ENTRYPOINT ["java", "-jar", "operator.jar"] diff --git a/packaging/src/kubernetes/README.md b/packaging/src/kubernetes/README.md new file mode 100644 index 000000000000..6d5113a4a346 --- /dev/null +++ b/packaging/src/kubernetes/README.md @@ -0,0 +1,860 @@ + + +# Hive Kubernetes Operator + +A Java-based Kubernetes operator that manages Apache Hive clusters declaratively +using a single `HiveCluster` custom resource. Built with +[Java Operator SDK (JOSDK)](https://javaoperatorsdk.io/) and +[fabric8 Kubernetes client](https://github.com/fabric8io/kubernetes-client). + +## Features + +- **Single CRD** (`HiveCluster`) manages all Hive components +- **Four Hive services**: Metastore, HiveServer2, LLAP, and Tez AM +- **Storage-agnostic**: works with any Hadoop-compatible filesystem (S3A, + ABFS, GCS, HDFS, Ozone) via `hadoop.coreSiteOverrides` and `envVars` +- **Automatic dependency ordering**: schema init before Metastore, Metastore + before HiveServer2, etc. +- **Optional components**: LLAP and Tez AM are enabled/disabled via spec flags +- **External Metastore**: skip deploying the Metastore and point HiveServer2 at + an existing external Hive Metastore +- **Status reporting**: per-component readiness tracked on the CRD status + +## Architecture + +``` +HiveCluster CR + | + v +HiveClusterReconciler + | + +-- HadoopConfigMapDependent (core-site.xml with user-provided filesystem config) + +-- MetastoreConfigMapDependent (metastore-site.xml) + +-- HiveServer2ConfigMapDependent (hive-site.xml + tez-site.xml) + +-- SchemaInitJobDependent (schematool -initOrUpgradeSchema) + +-- MetastoreDeploymentDependent --> MetastoreServiceDependent + +-- HiveServer2DeploymentDependent --> HiveServer2ServiceDependent + +-- LlapStatefulSetDependent --> LlapServiceDependent (optional) + +-- ScratchPvcDependent (shared scratch PVC for HS2+TezAM, optional) + +-- TezAmStatefulSetDependent --> TezAmServiceDependent (optional) +``` + +**Startup order:** + +1. ConfigMaps (Hadoop, Metastore [if enabled], HiveServer2) +2. Schema Init Job (`schematool -initOrUpgradeSchema`) [if Metastore enabled] +3. Metastore Deployment + Service [if enabled] +4. HiveServer2 Deployment + Service +5. LLAP StatefulSet + Scratch PVC + Tez AM StatefulSet (if enabled) + +## Prerequisites + +- Kubernetes cluster (minikube, kind, EKS, GKE, etc.) +- `kubectl` configured to talk to the cluster +- Java 21+ and Maven 3.8+ (for building) +- A ZooKeeper instance accessible from the cluster +- A storage backend accessible from the cluster (S3A, ABFS, GCS, HDFS, or Ozone) + +## Build + +```bash +mvn clean package -pl packaging/src/kubernetes -DskipTests +``` + +This produces: + +| Artifact | Path | +|----------|------| +| Shaded JAR | `target/hive-kubernetes-operator-*-shaded.jar` | +| CRD YAML (v1) | `src/gen/hiveclusters.hive.apache.org-v1.yml` | + +The CRD YAML is auto-generated by fabric8 during compilation and copied to +`src/gen/` so it can be version-controlled. + +## Build the Operator Docker Image + +Use the `-Pkubernetes` Maven profile to build the Docker image (the image is +tagged with the project version from the POM): + +```bash +mvn clean package -pl packaging/src/kubernetes -Pkubernetes -DskipTests +``` + +This builds the jar **and** runs: +``` +docker build -t apache/hive:operator- . +``` + +Alternatively, build the image manually: + +```bash +cd packaging/src/kubernetes +export HIVE_VERSION=4.3.0-SNAPSHOT +docker build -t apache/hive:operator-${HIVE_VERSION} . +``` + +For **minikube**, build inside the minikube Docker daemon: + +```bash +eval $(minikube docker-env) +export HIVE_VERSION=4.3.0-SNAPSHOT +docker build -t apache/hive:operator-${HIVE_VERSION} . +``` + +For **kind**, load the image into the cluster: + +```bash +export HIVE_VERSION=4.3.0-SNAPSHOT +docker build -t apache/hive:operator-${HIVE_VERSION} . +kind load docker-image apache/hive:operator-${HIVE_VERSION} +``` + +## Install the CRD and Operator + +These steps are the same regardless of which deployment scenario you choose. + +### 1. Install the CRD + +```bash +cd packaging/src/kubernetes +kubectl apply -f src/gen/hiveclusters.hive.apache.org-v1.yml +``` + +Verify: + +```bash +kubectl get crd hiveclusters.hive.apache.org +``` + +### 2. Deploy RBAC and the Operator + +```bash +kubectl create namespace hive-operator + +kubectl apply -f config/rbac/service-account.yaml +kubectl apply -f config/rbac/cluster-role.yaml +kubectl apply -f config/rbac/cluster-role-binding.yaml + +export HIVE_VERSION=4.3.0-SNAPSHOT +envsubst < config/operator/deployment.yaml | kubectl apply -f - +``` + +Verify the operator is running: + +```bash +kubectl -n hive-operator get pods +``` + +### 3. Deploy ZooKeeper (if you don't have one) + +ZooKeeper is required for Tez session management. Skip this if you already +have a ZooKeeper instance. + +```bash +helm repo add bitnami https://charts.bitnami.com/bitnami +helm install zookeeper bitnami/zookeeper \ + --set replicaCount=1 \ + --set auth.enabled=false \ + --set image.repository=bitnamilegacy/zookeeper \ + --set image.tag=3.9.3-debian-12-r21 \ + --set global.security.allowInsecureImages=true \ + --wait +``` + +This creates a Service named `zookeeper` on port `2181`. + +--- + +## Storage Setup + +The operator is **storage-agnostic** — it works with any Hadoop-compatible +filesystem. You provide the filesystem configuration via `spec.hadoop.coreSiteOverrides` +(for `core-site.xml` properties) and `spec.envVars` (for credentials injected +into all component pods). The operator does **not** deploy a storage cluster +itself. + +### Using Apache Ozone (S3A via Helm Chart) + +Apache Ozone provides an S3-compatible object store. Use the official Helm +chart to deploy it alongside the operator. + +#### Step 1: Install Ozone via Helm + +```bash +helm repo add ozone https://apache.github.io/ozone-helm-charts/ +helm install ozone ozone/ozone --version 0.2.0 --wait +``` + +For resource-constrained environments (e.g., CI, minikube), create a +`ozone-values.yaml`: + +```yaml +datanode: + replicas: 1 +env: +- name: OZONE-SITE.XML_hdds.datanode.volume.min.free.space + value: "256MB" +- name: OZONE-SITE.XML_hdds.scm.safemode.enabled + value: "false" +- name: OZONE-SITE.XML_ozone.scm.container.size + value: 128MB +- name: OZONE-SITE.XML_ozone.scm.block.size + value: 32MB +- name: OZONE-SITE.XML_ozone.server.default.replication + value: "1" +``` + +Then install with: + +```bash +helm install ozone ozone/ozone --version 0.2.0 --values ozone-values.yaml --wait +``` + +#### Step 2: Create the Ozone bucket + +```bash +kubectl exec statefulset/ozone-om -- ozone sh volume create /s3v +kubectl exec statefulset/ozone-om -- ozone sh bucket create /s3v/hive +``` + +#### Step 3: Configure the HiveCluster CR + +```yaml + hadoop: + coreSiteOverrides: + fs.defaultFS: "s3a://hive" + fs.s3a.endpoint: "http://ozone-s3g-rest:9878" + fs.s3a.path.style.access: "true" + envVars: + - name: HADOOP_OPTIONAL_TOOLS + value: "hadoop-aws" + - name: AWS_ACCESS_KEY_ID + value: "ozone" + - name: AWS_SECRET_ACCESS_KEY + value: "ozone" +``` + +The Ozone Helm chart exposes the S3 Gateway as a Kubernetes Service named +`ozone-s3g-rest` on port `9878`. Default Ozone credentials are `ozone`/`ozone`. + +#### Teardown + +```bash +helm uninstall ozone +``` + +### Using MinIO (S3A) + +```yaml + hadoop: + coreSiteOverrides: + fs.defaultFS: "s3a://my-bucket" + fs.s3a.endpoint: "http://minio.minio-ns.svc:9000" + fs.s3a.path.style.access: "true" + envVars: + - name: HADOOP_OPTIONAL_TOOLS + value: "hadoop-aws" + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: minio-creds + key: accessKey + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: minio-creds + key: secretKey +``` + +### Using AWS S3 + +```yaml + hadoop: + coreSiteOverrides: + fs.defaultFS: "s3a://my-bucket" + envVars: + - name: HADOOP_OPTIONAL_TOOLS + value: "hadoop-aws" + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: aws-s3-creds + key: accessKey + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: aws-s3-creds + key: secretKey +``` + +### Using Azure ABFS + +```yaml + hadoop: + coreSiteOverrides: + fs.defaultFS: "abfss://container@account.dfs.core.windows.net" + fs.azure.account.auth.type.account.dfs.core.windows.net: "SharedKey" + fs.azure.account.key.account.dfs.core.windows.net: "$(AZURE_STORAGE_KEY)" + envVars: + - name: HADOOP_OPTIONAL_TOOLS + value: "hadoop-azure" + - name: AZURE_STORAGE_KEY + valueFrom: + secretKeyRef: + name: azure-creds + key: storageKey +``` + +### Using Google Cloud Storage (GCS) + +First create a Secret from your service account key: + +```bash +kubectl create secret generic gcs-creds --from-file=key.json=/path/to/sa-key.json +``` + +Then configure the filesystem, download the GCS connector JAR, and mount the +key file into all pods: + +```yaml + hadoop: + coreSiteOverrides: + fs.defaultFS: "gs://my-bucket" + fs.gs.impl: "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem" + fs.gs.auth.type: "SERVICE_ACCOUNT_JSON_KEYFILE" + fs.gs.auth.service.account.json.keyfile: "/etc/gcs/key.json" + externalJars: + - "https://repo1.maven.org/maven2/com/google/cloud/bigdataoss/gcs-connector/hadoop3-2.2.25/gcs-connector-hadoop3-2.2.25-shaded.jar" + volumes: + - name: gcs-key + secret: + secretName: gcs-creds + volumeMounts: + - name: gcs-key + mountPath: /etc/gcs + readOnly: true +``` + +### Using HDFS + +```yaml + hadoop: + coreSiteOverrides: + fs.defaultFS: "hdfs://namenode:8020" +``` + +--- + +## Deployment Scenarios + +### Scenario 1: Minimal Cluster (Metastore + HiveServer2) + +**Use this when:** you want a basic Hive cluster backed by external +storage and PostgreSQL. + +#### Step 1: Deploy PostgreSQL (Bitnami Helm) + +```bash +helm repo add bitnami https://charts.bitnami.com/bitnami +helm install postgres bitnami/postgresql \ + --set auth.username=hive \ + --set auth.password=hive123 \ + --set auth.database=metastore \ + --wait +``` + +This creates a Service named `postgres-postgresql` on port `5432`. The password +is also stored in a Secret named `postgres-postgresql` under key `password`. + +Create the Secret the operator will reference: + +```bash +kubectl create secret generic hive-db-secret \ + --from-literal=password=hive123 +``` + +#### Step 2: Set up storage + +Follow the [Storage Setup](#storage-setup) section above to deploy your +storage backend (e.g., Ozone via Helm). + +#### Step 3: Create the HiveCluster + +```bash +envsubst < config/samples/hivecluster-minimal.yaml | kubectl apply -f - +``` + +Or inline: + +```bash +kubectl apply -f - <<'EOF' +apiVersion: hive.apache.org/v1alpha1 +kind: HiveCluster +metadata: + name: my-hive +spec: + image: apache/hive:${HIVE_VERSION} + imagePullPolicy: IfNotPresent + + metastore: + replicas: 1 + database: + type: postgres + url: "jdbc:postgresql://postgres-postgresql:5432/metastore" + driver: "org.postgresql.Driver" + username: hive + passwordSecretRef: + name: hive-db-secret + key: password + driverJarUrl: "https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" + warehouseDir: "/hive/warehouse" + + hiveServer2: + replicas: 1 + serviceType: ClusterIP + + zookeeper: + quorum: "zookeeper:2181" + + hadoop: + coreSiteOverrides: + fs.defaultFS: "s3a://hive" + fs.s3a.endpoint: "http://ozone-s3g-rest:9878" + fs.s3a.path.style.access: "true" + envVars: + - name: HADOOP_OPTIONAL_TOOLS + value: "hadoop-aws" + - name: AWS_ACCESS_KEY_ID + value: "ozone" + - name: AWS_SECRET_ACCESS_KEY + value: "ozone" +EOF +``` + +#### What happens + +The operator creates: + +| Resource | Purpose | +|----------|---------| +| `my-hive-hadoop-config` ConfigMap | `core-site.xml` with filesystem configuration from `coreSiteOverrides` | +| `my-hive-metastore-config` ConfigMap | `metastore-site.xml` with warehouse dir and DB settings | +| `my-hive-hiveserver2-config` ConfigMap | `hive-site.xml` + `tez-site.xml` | +| `my-hive-schema-init` | Job that runs `schematool -initOrUpgradeSchema` | +| `my-hive-metastore` | Metastore Deployment + Service (port 9083) | +| `my-hive-hiveserver2` | HiveServer2 Deployment + Service (port 10000) | + +--- + +### Scenario 2: External RDBMS + +**Use this when:** you have an existing database instance (e.g. Amazon RDS, +Cloud SQL, a corporate database, or a different database engine like MySQL/Oracle). + +**What you provide:** the JDBC URL, credentials, and driver jar URL for your +existing database. + +#### Step 1: Create the database password Secret + +```bash +kubectl create secret generic hive-db-secret \ + --from-literal=password= +``` + +#### Step 2: Create the HiveCluster + +Point the `database` section at your external RDBMS: + +```yaml +apiVersion: hive.apache.org/v1alpha1 +kind: HiveCluster +metadata: + name: my-hive +spec: + image: apache/hive:${HIVE_VERSION} + + metastore: + replicas: 1 + database: + type: postgres + url: "jdbc:postgresql://my-rds-host.us-east-1.rds.amazonaws.com:5432/metastore" + driver: "org.postgresql.Driver" + username: hive_admin + passwordSecretRef: + name: hive-db-secret + key: password + driverJarUrl: "https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" + warehouseDir: "/hive/warehouse" + + hiveServer2: + replicas: 1 + serviceType: ClusterIP + + zookeeper: + quorum: "zookeeper:2181" + + hadoop: + coreSiteOverrides: + fs.defaultFS: "s3a://my-bucket" + envVars: + - name: HADOOP_OPTIONAL_TOOLS + value: "hadoop-aws" + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: s3-creds + key: accessKey + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: s3-creds + key: secretKey +``` + +The `driverJarUrl` field tells the operator to add an init container that +downloads the JDBC driver JAR at pod startup. This works for any URL +(Maven Central, internal artifact repo, etc.). + +#### Supported databases + +| Database | `type` | Example `url` | Example `driver` | +|----------|--------|---------------|------------------| +| PostgreSQL | `postgres` | `jdbc:postgresql://host:5432/metastore` | `org.postgresql.Driver` | +| MySQL | `mysql` | `jdbc:mysql://host:3306/metastore` | `com.mysql.cj.jdbc.Driver` | +| Oracle | `oracle` | `jdbc:oracle:thin:@host:1521/FREEPDB1` | `oracle.jdbc.OracleDriver` | +| Derby | `derby` | *(embedded, no URL needed)* | *(auto-detected)* | + + +### Scenario 3: External Hive Metastore + +**Use this when:** you already have an existing Hive Metastore running outside +the cluster (or managed separately) and only want the operator to deploy +HiveServer2 (and optionally LLAP / Tez AM). + +Set `spec.metastore.enabled: false` and provide the thrift URI of your external +Metastore via `spec.metastore.externalUri`. The operator will skip deploying the +Metastore Deployment, Service, schema-init Job, and metastore ConfigMap entirely. +HiveServer2 will connect directly to the external Metastore. + + +Like: + +```yaml +apiVersion: hive.apache.org/v1alpha1 +kind: HiveCluster +metadata: + name: my-hive +spec: + image: apache/hive:${HIVE_VERSION} + imagePullPolicy: IfNotPresent + + metastore: + enabled: false + externalUri: "thrift://host.docker.internal:9083" + + hiveServer2: + replicas: 2 + serviceType: ClusterIP + resources: + requestsMemory: "1Gi" + limitsMemory: "2Gi" + configOverrides: + hive.server2.enable.doAs: "false" + + llap: + enabled: true + replicas: 2 + executors: 1 + memoryMb: 1024 + serviceHosts: "@llap0" + resources: + requestsMemory: "2Gi" + limitsMemory: "3Gi" + + tezAm: + enabled: true + replicas: 2 + + zookeeper: + quorum: "zookeeper:2181" + + hadoop: + coreSiteOverrides: + fs.defaultFS: "s3a://hive" + fs.s3a.endpoint: "http://ozone-s3g-rest:9878" + fs.s3a.path.style.access: "true" + envVars: + - name: HADOOP_OPTIONAL_TOOLS + value: "hadoop-aws" + - name: AWS_ACCESS_KEY_ID + value: "ozone" + - name: AWS_SECRET_ACCESS_KEY + value: "ozone" +``` + +#### What happens + +When `metastore.enabled` is `false`: + +| Skipped | Reason | +|---------|--------| +| `my-hive-schema-init` Job | No managed DB to initialize | +| `my-hive-metastore` Deployment + Service | External Metastore handles this | +| `my-hive-metastore-config` ConfigMap | Not needed | + +The operator still creates HiveServer2 (and optionally LLAP / Tez AM) with +`hive.metastore.uris` pointing at the external URI. The status reports +`MetastoreReady: True` with reason `ExternalMetastore`. + +--- + +### Scenario 4: Full Cluster (LLAP + Tez AM) + +**Use this when:** you want all four Hive services running - Metastore, +HiveServer2, LLAP daemons, and a standalone Tez Application Master. + +When `tezAm.enabled` is `true`, HiveServer2 is configured to use external Tez +sessions via ZooKeeper (`hive.server2.tez.use.external.sessions=true`). The +standalone Tez AM registers itself in ZooKeeper and HiveServer2 discovers it +through the registry. + +When `tezAm.enabled` is `false` (the default), HiveServer2 runs Tez in local +mode (`tez.local.mode=true`), where the Tez DAG executes inside the HiveServer2 +JVM itself. + +#### Step 1: Deploy PostgreSQL and Storage + +See [Scenario 1 Step 1](#step-1-deploy-postgresql-bitnami-helm) for PostgreSQL and +[Storage Setup](#storage-setup) for your storage backend. + +#### Step 2: Create the HiveCluster + +```bash +envsubst < config/samples/hivecluster-full-ha.yaml | kubectl apply -f - +``` + +#### What this creates + +With all components enabled, the operator creates approximately 12 resources on Non HA mode: + +| Category | Resources | +|----------|-----------| +| Hive | Schema-init Job (1), Metastore (1), HiveServer2 (2), LLAP (2), Tez AM (1), scratch PVC (1) | +| You deployed | PostgreSQL (1), ZooKeeper (1), storage backend | + +--- + +## Monitor + +```bash +# Watch pods come up in order +kubectl get pods -w + +# Check HiveCluster status and conditions +kubectl get hiveclusters +kubectl describe hivecluster my-hive + +# Operator logs +kubectl -n hive-operator logs -f deployment/hive-operator +``` + +## Connect to HiveServer2 + +```bash +# Port-forward the thrift port +kubectl port-forward svc/my-hive-hiveserver2 10000:10000 + +# Connect with Beeline +beeline -u "jdbc:hive2://my-hive-hiveserver2:10000/" + +# Connect to HiveServe2 UI +kubectl port-forward svc/my-hive-hiveserver2 10002:10002 + +Then use the URL `http://localhost:10002/` to access the UI. +``` + +Or exec into the HiveServer2 pod directly: + +```bash +kubectl exec -it deployment/my-hive-hiveserver2 -- beeline -u "jdbc:hive2://my-hive-hiveserver2:10000/" +``` + +If the HiveServer2 Service type is `LoadBalancer` or `NodePort`, use the +external address directly instead of port-forwarding. + + + +## CRD Reference + +### Top-level spec + +| Field | Type | Default | Description | +|-------|------|---------|-------------| +| `spec.image` | string | | Hive Docker image | +| `spec.imagePullPolicy` | string | `IfNotPresent` | Image pull policy | +| `spec.envVars` | list | | Environment variables injected into all component pods (e.g., storage credentials). Supports both literal values and `valueFrom.secretKeyRef`. | +| `spec.externalJars` | list | | URLs of JARs downloaded into all pods at startup (e.g., GCS connector, ABFS connector). The Docker entrypoint automatically adds them to the classpath. | +| `spec.volumes` | list | | Volumes added to all component pods (e.g., Secrets containing keytabs or service account keys) | +| `spec.volumeMounts` | list | | Volume mounts added to all component containers (e.g., mounting a GCS key at `/etc/gcs/key.json`) | + +### Metastore (`spec.metastore`) + +| Field | Type | Default | Description | +|-------|------|---------|-------------| +| `enabled` | boolean | `true` | Whether the operator deploys and manages a Metastore. Set `false` to use an external Metastore. | +| `externalUri` | string | | Thrift URI of the external Metastore (required when `enabled` is `false`, e.g. `thrift://host:9083`) | +| `replicas` | int | `1` | Number of Metastore replicas (ignored when `enabled` is `false`) | +| `warehouseDir` | string | | Warehouse directory (e.g. `s3a://hive/warehouse`) | +| `configOverrides` | map | | Extra `metastore-site.xml` properties | +| `resources.*` | object | | CPU/memory requests and limits | + +### Metastore Database (`spec.metastore.database`) + +| Field | Type | Default | Description | +|-------|------|---------|-------------| +| `type` | string | `derby` | DB type: `postgres`, `mysql`, `derby` | +| `url` | string | | JDBC connection URL | +| `driver` | string | | JDBC driver class name | +| `username` | string | | Database username | +| `passwordSecretRef.name` | string | | Kubernetes Secret name containing the password | +| `passwordSecretRef.key` | string | | Key within the Secret | +| `driverJarUrl` | string | | URL to download the JDBC driver JAR at pod startup | + +### HiveServer2 (`spec.hiveServer2`) + +| Field | Type | Default | Description | +|-------|------|---------|-------------| +| `replicas` | int | `1` | Number of HiveServer2 replicas | +| `serviceType` | string | `ClusterIP` | Kubernetes Service type (`ClusterIP`, `NodePort`, `LoadBalancer`) | +| `thriftPort` | int | `10000` | Thrift port | +| `webUiPort` | int | `10002` | Web UI port | +| `configOverrides` | map | | Extra `hive-site.xml` properties | +| `resources.*` | object | | CPU/memory requests and limits | + +### LLAP (`spec.llap`) + +| Field | Type | Default | Description | +|-------|------|---------|-------------| +| `enabled` | boolean | `false` | Enable LLAP daemons | +| `replicas` | int | `1` | Number of LLAP daemon replicas | +| `executors` | int | `1` | Executors per daemon | +| `memoryMb` | int | `2048` | Memory per daemon (MB) | +| `serviceHosts` | string | | LLAP service hosts identifier (e.g. `@llap0`) | +| `resources.*` | object | | CPU/memory requests and limits | + +### Tez AM (`spec.tezAm`) + +| Field | Type | Default | Description | +|-------|------|---------|-------------| +| `enabled` | boolean | `false` | Enable standalone Tez Application Master | +| `replicas` | int | `1` | Number of Tez AM replicas | +| `scratchStorageSize` | string | `1Gi` | Storage size for the shared scratch PVC (ReadWriteMany) mounted on HS2 and TezAM at `/opt/hive/scratch` | +| `scratchStorageClassName` | string | | StorageClass for the shared scratch PVC. Must support ReadWriteMany. If empty, uses cluster default. | +| `resources.*` | object | | CPU/memory requests and limits | + +### ZooKeeper (`spec.zookeeper`) + +| Field | Type | Default | Description | +|-------|------|---------|-------------| +| `quorum` | string | `zookeeper:2181` | ZooKeeper connection string | + + +### Hadoop (`spec.hadoop`) + +| Field | Type | Default | Description | +|-------|------|---------|-------------| +| `coreSiteOverrides` | map | | `core-site.xml` properties for filesystem configuration (e.g., `fs.defaultFS`, `fs.s3a.*`, `fs.azure.*`, `fs.gs.*`) | + +### Resource requirements + +All components support a `resources` object with these fields: + +| Field | Example | +|-------|---------| +| `resources.requestsCpu` | `"500m"` | +| `resources.requestsMemory` | `"1Gi"` | +| `resources.limitsCpu` | `"2"` | +| `resources.limitsMemory` | `"4Gi"` | + +## How Storage Configuration Works + +The operator is storage-agnostic — it does not hardcode any filesystem-specific +logic. You configure storage using three mechanisms: + +1. **`spec.hadoop.coreSiteOverrides`** — Filesystem properties written to + `core-site.xml` (e.g., `fs.defaultFS`, `fs.s3a.endpoint`, `fs.azure.*`, + `fs.gs.*`). This ConfigMap is projected into all component pods. + +2. **`spec.envVars`** — Environment variables injected into all component + containers (Metastore, HiveServer2, LLAP, Tez AM). Use this for credentials + that should not appear in ConfigMaps, and for `HADOOP_OPTIONAL_TOOLS` which + tells the Hadoop classpath to include optional connector JARs (e.g., + `hadoop-aws` for S3A, `hadoop-azure` for ABFS). Supports both literal values + and `valueFrom.secretKeyRef` for Kubernetes Secrets. + +3. **`spec.externalJars`** — URLs of connector JARs (GCS, ABFS, etc.) + downloaded via an init container into `/tmp/ext-jars`. The Hive Docker + entrypoint automatically copies them to `$HIVE_HOME/lib/` at startup. + Required for filesystems whose implementation class is not bundled in the + Hive/Hadoop image. + +4. **`spec.volumes` / `spec.volumeMounts`** — Volumes and mounts added to all + component pods. Use this to mount credential files (GCS service account JSON, + Kerberos keytabs, etc.) from Kubernetes Secrets into a known path. + +This design supports any Hadoop-compatible filesystem: S3A (AWS, MinIO, Ozone), +ABFS (Azure), GCS (Google Cloud), HDFS, or Ozone native. + +### Overrides + +Properties in `spec.hadoop.coreSiteOverrides` populate `core-site.xml`. +Properties in `spec.metastore.configOverrides` and +`spec.hiveServer2.configOverrides` override values in `metastore-site.xml` and +`hive-site.xml` respectively. + +## Cleanup + +```bash +# Delete the HiveCluster (removes all managed pods, services, etc.) +kubectl delete hivecluster my-hive + +# Remove the operator +envsubst < config/operator/deployment.yaml | kubectl delete -f - +kubectl delete -f config/rbac/cluster-role-binding.yaml +kubectl delete -f config/rbac/cluster-role.yaml +kubectl delete -f config/rbac/service-account.yaml +kubectl delete namespace hive-operator + +# Remove the CRD +kubectl delete crd hiveclusters.hive.apache.org +``` +Remove Everything + +```bash +kubectl delete hivecluster my-hive --ignore-not-found +envsubst < config/operator/deployment.yaml | kubectl delete --ignore-not-found -f - +kubectl delete -f config/rbac/ --ignore-not-found +kubectl delete namespace hive-operator --ignore-not-found +kubectl delete secret hive-db-secret --ignore-not-found +kubectl delete crd hiveclusters.hive.apache.org --ignore-not-found +helm uninstall postgres --ignore-not-found 2>/dev/null || true +kubectl delete pvc data-postgres-postgresql-0 --ignore-not-found +helm uninstall zookeeper --ignore-not-found 2>/dev/null || true +kubectl delete pvc data-zookeeper-0 --ignore-not-found +helm uninstall ozone --ignore-not-found 2>/dev/null || true +``` diff --git a/packaging/src/kubernetes/config/operator/deployment.yaml b/packaging/src/kubernetes/config/operator/deployment.yaml new file mode 100644 index 000000000000..b7d9625daacf --- /dev/null +++ b/packaging/src/kubernetes/config/operator/deployment.yaml @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to you under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +apiVersion: apps/v1 +kind: Deployment +metadata: + name: hive-operator + namespace: hive-operator + labels: + app.kubernetes.io/name: hive-kubernetes-operator + app.kubernetes.io/managed-by: kubectl +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: hive-kubernetes-operator + template: + metadata: + labels: + app.kubernetes.io/name: hive-kubernetes-operator + spec: + serviceAccountName: hive-operator + containers: + - name: hive-operator + image: apache/hive:operator-${HIVE_VERSION} + imagePullPolicy: IfNotPresent + env: + - name: OPERATOR_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + resources: + requests: + cpu: 200m + memory: 256Mi + limits: + memory: 512Mi diff --git a/packaging/src/kubernetes/config/rbac/cluster-role-binding.yaml b/packaging/src/kubernetes/config/rbac/cluster-role-binding.yaml new file mode 100644 index 000000000000..26940ba6b37d --- /dev/null +++ b/packaging/src/kubernetes/config/rbac/cluster-role-binding.yaml @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to you under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: hive-operator-rolebinding +subjects: + - kind: ServiceAccount + name: hive-operator + namespace: hive-operator +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: hive-operator-role diff --git a/packaging/src/kubernetes/config/rbac/cluster-role.yaml b/packaging/src/kubernetes/config/rbac/cluster-role.yaml new file mode 100644 index 000000000000..22cd4f268b60 --- /dev/null +++ b/packaging/src/kubernetes/config/rbac/cluster-role.yaml @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to you under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: hive-operator-role +rules: + # HiveCluster CRD management + - apiGroups: ["hive.apache.org"] + resources: ["hiveclusters", "hiveclusters/status", "hiveclusters/finalizers"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + # Deployments and StatefulSets + - apiGroups: ["apps"] + resources: ["deployments", "statefulsets"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + # Jobs for schema initialization + - apiGroups: ["batch"] + resources: ["jobs"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + # Services, ConfigMaps, and PersistentVolumeClaims + - apiGroups: [""] + resources: ["services", "configmaps", "persistentvolumeclaims"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + # Secrets: read-only for DB password references + - apiGroups: [""] + resources: ["secrets"] + verbs: ["get", "list", "watch"] + # Events for status reporting + - apiGroups: [""] + resources: ["events"] + verbs: ["create", "patch"] + # Pods: read-only for readiness checking + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch"] diff --git a/packaging/src/kubernetes/config/rbac/service-account.yaml b/packaging/src/kubernetes/config/rbac/service-account.yaml new file mode 100644 index 000000000000..bc48726076b9 --- /dev/null +++ b/packaging/src/kubernetes/config/rbac/service-account.yaml @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to you under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +apiVersion: v1 +kind: ServiceAccount +metadata: + name: hive-operator + namespace: hive-operator diff --git a/packaging/src/kubernetes/config/samples/hivecluster-full-ha.yaml b/packaging/src/kubernetes/config/samples/hivecluster-full-ha.yaml new file mode 100644 index 000000000000..aaf3fbebcec8 --- /dev/null +++ b/packaging/src/kubernetes/config/samples/hivecluster-full-ha.yaml @@ -0,0 +1,81 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to you under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Full HiveCluster (HA): All four services with LLAP, TezAM, and multiple replicas +apiVersion: hive.apache.org/v1alpha1 +kind: HiveCluster +metadata: + name: my-hive +spec: + image: apache/hive:${HIVE_VERSION} + imagePullPolicy: IfNotPresent + + metastore: + replicas: 2 + database: + type: postgres + url: "jdbc:postgresql://postgres-postgresql:5432/metastore" + driver: "org.postgresql.Driver" + username: hive + passwordSecretRef: + name: hive-db-secret + key: password + driverJarUrl: "https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" + warehouseDir: "/hive/warehouse" + configOverrides: + metastore.catalog.servlet.port: "9001" + metastore.catalog.servlet.auth: "none" + + hiveServer2: + replicas: 2 + serviceType: ClusterIP + resources: + requestsMemory: "1Gi" + limitsMemory: "2Gi" + configOverrides: + hive.server2.enable.doAs: "false" + + llap: + enabled: true + replicas: 2 + executors: 1 + memoryMb: 1024 + serviceHosts: "@llap0" + resources: + requestsMemory: "2Gi" + limitsMemory: "3Gi" + + tezAm: + enabled: true + replicas: 2 + + zookeeper: + quorum: "zookeeper:2181" + + # Hadoop filesystem configuration — any storage backend (S3A, ABFS, GCS, HDFS, Ozone) + hadoop: + coreSiteOverrides: + fs.defaultFS: "s3a://hive" + fs.s3a.endpoint: "http://ozone-s3g-rest:9878" + fs.s3a.path.style.access: "true" + + # Environment variables injected into all component pods (credentials, JVM options, etc.) + envVars: + - name: HADOOP_OPTIONAL_TOOLS + value: "hadoop-aws" + - name: AWS_ACCESS_KEY_ID + value: "ozone" + - name: AWS_SECRET_ACCESS_KEY + value: "ozone" diff --git a/packaging/src/kubernetes/config/samples/hivecluster-minimal.yaml b/packaging/src/kubernetes/config/samples/hivecluster-minimal.yaml new file mode 100644 index 000000000000..617cc73836ff --- /dev/null +++ b/packaging/src/kubernetes/config/samples/hivecluster-minimal.yaml @@ -0,0 +1,59 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to you under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Minimal HiveCluster: Metastore + HiveServer2 with external S3-compatible storage +apiVersion: hive.apache.org/v1alpha1 +kind: HiveCluster +metadata: + name: my-hive +spec: + image: apache/hive:${HIVE_VERSION} + imagePullPolicy: IfNotPresent + + metastore: + replicas: 1 + database: + type: postgres + url: "jdbc:postgresql://postgres-postgresql:5432/metastore" + driver: "org.postgresql.Driver" + username: hive + passwordSecretRef: + name: hive-db-secret + key: password + driverJarUrl: "https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" + warehouseDir: "/hive/warehouse" + + hiveServer2: + replicas: 1 + serviceType: ClusterIP + + zookeeper: + quorum: "zookeeper:2181" + + # Hadoop filesystem configuration — any storage backend (S3A, ABFS, GCS, HDFS, Ozone) + hadoop: + coreSiteOverrides: + fs.defaultFS: "s3a://hive" + fs.s3a.endpoint: "http://ozone-s3g-rest:9878" + fs.s3a.path.style.access: "true" + + # Environment variables injected into all component pods (credentials, JVM options, etc.) + envVars: + - name: HADOOP_OPTIONAL_TOOLS + value: "hadoop-aws" + - name: AWS_ACCESS_KEY_ID + value: "ozone" + - name: AWS_SECRET_ACCESS_KEY + value: "ozone" diff --git a/packaging/src/kubernetes/pom.xml b/packaging/src/kubernetes/pom.xml new file mode 100644 index 000000000000..b45b1b76968a --- /dev/null +++ b/packaging/src/kubernetes/pom.xml @@ -0,0 +1,169 @@ + + + + 4.0.0 + + org.apache.hive + hive + 4.3.0-SNAPSHOT + ../../../pom.xml + + hive-kubernetes-operator + jar + Hive Kubernetes Operator + Kubernetes operator for managing Apache Hive clusters + + ../../.. + + + + + io.javaoperatorsdk + operator-framework + ${josdk.version} + + + + io.fabric8 + crd-generator-apt + ${fabric8.version} + provided + + + + org.apache.logging.log4j + log4j-slf4j-impl + ${log4j2.version} + + + org.apache.logging.log4j + log4j-core + ${log4j2.version} + + + + src/java + + + src/resources + + + + + org.apache.maven.plugins + maven-compiler-plugin + + full + + + io.fabric8 + crd-generator-apt + ${fabric8.version} + + + + + + + org.apache.maven.plugins + maven-shade-plugin + ${maven.shade.plugin.version} + + + package + + shade + + + false + true + shaded + + + org.apache.hive.kubernetes.operator.HiveOperatorMain + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + org.apache.maven.plugins + maven-antrun-plugin + + + copy-crd-to-src-gen + compile + + run + + + + + + + + + + + + + + + + + + kubernetes + + + + org.codehaus.mojo + exec-maven-plugin + + + docker-build + package + + exec + + + docker + + build + -t + apache/hive:operator-${project.version} + . + + + + + + + + + + diff --git a/packaging/src/kubernetes/src/gen/hiveclusters.hive.apache.org-v1.yml b/packaging/src/kubernetes/src/gen/hiveclusters.hive.apache.org-v1.yml new file mode 100644 index 000000000000..23ccb2c5112a --- /dev/null +++ b/packaging/src/kubernetes/src/gen/hiveclusters.hive.apache.org-v1.yml @@ -0,0 +1,500 @@ +# Generated by Fabric8 CRDGenerator, manual edits might get overwritten! +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: hiveclusters.hive.apache.org +spec: + group: hive.apache.org + names: + kind: HiveCluster + plural: hiveclusters + shortNames: + - hc + singular: hivecluster + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + properties: + spec: + properties: + envVars: + description: "Environment variables injected into all component pods\ + \ (e.g., storage credentials, custom JVM options)" + items: + type: object + type: array + x-kubernetes-preserve-unknown-fields: true + externalJars: + description: "External JARs (URLs) downloaded into all component pods\ + \ and added to HADOOP_CLASSPATH (e.g., GCS connector, ABFS connector)" + items: + type: string + type: array + hadoop: + description: Hadoop/core-site.xml configuration overrides + properties: + coreSiteOverrides: + additionalProperties: + type: string + description: Key-value pairs written into core-site.xml + type: object + type: object + hiveServer2: + description: HiveServer2 component configuration + properties: + configOverrides: + additionalProperties: + type: string + description: Additional configuration overrides as key-value pairs + type: object + externalJars: + description: 'List of URIs to external JARs to download and add + to HS2 classpath ' + items: + type: string + type: array + extraVolumeMounts: + description: Additional volume mounts for the container + items: + type: object + type: array + x-kubernetes-preserve-unknown-fields: true + extraVolumes: + description: "Additional volumes to attach to the pod (e.g., for\ + \ keytabs or truststores)" + items: + type: object + type: array + x-kubernetes-preserve-unknown-fields: true + livenessProbe: + description: Liveness probe configuration + properties: + failureThreshold: + description: Minimum consecutive failures for the probe to + be considered failed after having succeeded. + type: integer + initialDelaySeconds: + description: Number of seconds after the container has started + before probes are initiated. + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + type: integer + successThreshold: + description: Minimum consecutive successes for the probe to + be considered successful after having failed. + type: integer + timeoutSeconds: + description: Number of seconds after which the probe times + out. + type: integer + type: object + readinessProbe: + description: Readiness probe configuration + properties: + failureThreshold: + description: Minimum consecutive failures for the probe to + be considered failed after having succeeded. + type: integer + initialDelaySeconds: + description: Number of seconds after the container has started + before probes are initiated. + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + type: integer + successThreshold: + description: Minimum consecutive successes for the probe to + be considered successful after having failed. + type: integer + timeoutSeconds: + description: Number of seconds after which the probe times + out. + type: integer + type: object + replicas: + description: Number of replicas + type: integer + resources: + description: Resource requirements for pods + properties: + limitsCpu: + description: "CPU limit (e.g. 2, 1000m)" + type: string + limitsMemory: + description: "Memory limit (e.g. 2Gi, 1024Mi)" + type: string + requestsCpu: + description: "CPU request (e.g. 500m, 1)" + type: string + requestsMemory: + description: "Memory request (e.g. 1Gi, 512Mi)" + type: string + type: object + serviceType: + description: "Kubernetes Service type: ClusterIP, LoadBalancer,\ + \ or NodePort" + type: string + thriftPort: + description: HiveServer2 Thrift port + type: integer + webUiPort: + description: HiveServer2 Web UI port + type: integer + type: object + x-kubernetes-preserve-unknown-fields: true + image: + description: Docker image to use for all Hive components + type: string + imagePullPolicy: + description: "Image pull policy: Always, Never, or IfNotPresent" + type: string + llap: + description: LLAP daemon configuration. Disabled by default. + properties: + configOverrides: + additionalProperties: + type: string + description: Additional configuration overrides as key-value pairs + type: object + enabled: + description: Whether LLAP is enabled + type: boolean + executors: + description: Number of LLAP executors per daemon + type: integer + extraVolumeMounts: + description: Additional volume mounts for the container + items: + type: object + type: array + x-kubernetes-preserve-unknown-fields: true + extraVolumes: + description: "Additional volumes to attach to the pod (e.g., for\ + \ keytabs or truststores)" + items: + type: object + type: array + x-kubernetes-preserve-unknown-fields: true + memoryMb: + description: Memory in MB per LLAP daemon instance + type: integer + readinessProbe: + description: Readiness probe configuration + properties: + failureThreshold: + description: Minimum consecutive failures for the probe to + be considered failed after having succeeded. + type: integer + initialDelaySeconds: + description: Number of seconds after the container has started + before probes are initiated. + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + type: integer + successThreshold: + description: Minimum consecutive successes for the probe to + be considered successful after having failed. + type: integer + timeoutSeconds: + description: Number of seconds after which the probe times + out. + type: integer + type: object + replicas: + description: Number of replicas + type: integer + resources: + description: Resource requirements for pods + properties: + limitsCpu: + description: "CPU limit (e.g. 2, 1000m)" + type: string + limitsMemory: + description: "Memory limit (e.g. 2Gi, 1024Mi)" + type: string + requestsCpu: + description: "CPU request (e.g. 500m, 1)" + type: string + requestsMemory: + description: "Memory request (e.g. 1Gi, 512Mi)" + type: string + type: object + serviceHosts: + description: LLAP service hosts identifier for ZooKeeper registration + type: string + type: object + x-kubernetes-preserve-unknown-fields: true + metastore: + description: Metastore component configuration + properties: + configOverrides: + additionalProperties: + type: string + description: Additional configuration overrides as key-value pairs + type: object + database: + description: Database connection configuration for the metastore + backend + properties: + driver: + description: JDBC driver class name + type: string + driverJarUrl: + description: "URL to download the JDBC driver jar, e.g. https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" + type: string + passwordSecretRef: + description: Reference to a Kubernetes Secret containing the + database password + properties: + key: + description: Key within the Secret + type: string + name: + description: Name of the Kubernetes Secret + type: string + type: object + type: + description: "Database type: derby, mysql, postgres, mssql,\ + \ or oracle" + type: string + url: + description: JDBC connection URL + type: string + username: + description: Database username + type: string + type: object + enabled: + description: Whether the operator should deploy and manage a Metastore + type: boolean + externalUri: + description: Thrift URI of the external Metastore (if enabled + is false) + type: string + extraVolumeMounts: + description: Additional volume mounts for the container + items: + type: object + type: array + x-kubernetes-preserve-unknown-fields: true + extraVolumes: + description: "Additional volumes to attach to the pod (e.g., for\ + \ keytabs or truststores)" + items: + type: object + type: array + x-kubernetes-preserve-unknown-fields: true + livenessProbe: + description: Liveness probe configuration + properties: + failureThreshold: + description: Minimum consecutive failures for the probe to + be considered failed after having succeeded. + type: integer + initialDelaySeconds: + description: Number of seconds after the container has started + before probes are initiated. + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + type: integer + successThreshold: + description: Minimum consecutive successes for the probe to + be considered successful after having failed. + type: integer + timeoutSeconds: + description: Number of seconds after which the probe times + out. + type: integer + type: object + readinessProbe: + description: Readiness probe configuration + properties: + failureThreshold: + description: Minimum consecutive failures for the probe to + be considered failed after having succeeded. + type: integer + initialDelaySeconds: + description: Number of seconds after the container has started + before probes are initiated. + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + type: integer + successThreshold: + description: Minimum consecutive successes for the probe to + be considered successful after having failed. + type: integer + timeoutSeconds: + description: Number of seconds after which the probe times + out. + type: integer + type: object + replicas: + description: Number of replicas + type: integer + resources: + description: Resource requirements for pods + properties: + limitsCpu: + description: "CPU limit (e.g. 2, 1000m)" + type: string + limitsMemory: + description: "Memory limit (e.g. 2Gi, 1024Mi)" + type: string + requestsCpu: + description: "CPU request (e.g. 500m, 1)" + type: string + requestsMemory: + description: "Memory request (e.g. 1Gi, 512Mi)" + type: string + type: object + warehouseDir: + description: Warehouse directory path + type: string + type: object + x-kubernetes-preserve-unknown-fields: true + tezAm: + description: Tez Application Master configuration. Disabled by default. + properties: + configOverrides: + additionalProperties: + type: string + description: Additional configuration overrides as key-value pairs + type: object + enabled: + description: Whether Tez AM is enabled + type: boolean + extraVolumeMounts: + description: Additional volume mounts for the container + items: + type: object + type: array + x-kubernetes-preserve-unknown-fields: true + extraVolumes: + description: "Additional volumes to attach to the pod (e.g., for\ + \ keytabs or truststores)" + items: + type: object + type: array + x-kubernetes-preserve-unknown-fields: true + replicas: + description: Number of replicas + type: integer + resources: + description: Resource requirements for pods + properties: + limitsCpu: + description: "CPU limit (e.g. 2, 1000m)" + type: string + limitsMemory: + description: "Memory limit (e.g. 2Gi, 1024Mi)" + type: string + requestsCpu: + description: "CPU request (e.g. 500m, 1)" + type: string + requestsMemory: + description: "Memory request (e.g. 1Gi, 512Mi)" + type: string + type: object + scratchStorageClassName: + description: "StorageClass for the shared scratch PVC. Must support\ + \ ReadWriteMany access. If null, uses cluster default." + type: string + scratchStorageSize: + description: Storage size for the shared scratch PVC (ReadWriteMany) + mounted on HS2 and TezAM at /opt/hive/scratch + type: string + type: object + x-kubernetes-preserve-unknown-fields: true + volumeMounts: + description: "Volume mounts added to all component containers (e.g.,\ + \ mounting a GCS key file at /etc/gcs/key.json)" + items: + type: object + type: array + x-kubernetes-preserve-unknown-fields: true + volumes: + description: "Volumes added to all component pods (e.g., Secrets containing\ + \ keytabs or service account keys)" + items: + type: object + type: array + x-kubernetes-preserve-unknown-fields: true + zookeeper: + description: External ZooKeeper connection details (not managed by + this operator) + properties: + quorum: + description: ZooKeeper quorum connection string + type: string + type: object + type: object + x-kubernetes-preserve-unknown-fields: true + status: + properties: + conditions: + items: + properties: + lastTransitionTime: + type: string + message: + type: string + observedGeneration: + type: integer + reason: + type: string + status: + type: string + type: + type: string + type: object + type: array + hiveServer2: + properties: + desiredReplicas: + type: integer + phase: + type: string + readyReplicas: + type: integer + type: object + llap: + properties: + desiredReplicas: + type: integer + phase: + type: string + readyReplicas: + type: integer + type: object + metastore: + properties: + desiredReplicas: + type: integer + phase: + type: string + readyReplicas: + type: integer + type: object + observedGeneration: + type: integer + tezAm: + properties: + desiredReplicas: + type: integer + phase: + type: string + readyReplicas: + type: integer + type: object + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/HiveOperatorMain.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/HiveOperatorMain.java new file mode 100644 index 000000000000..ce4cfa6052c2 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/HiveOperatorMain.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator; + +import io.javaoperatorsdk.operator.Operator; +import org.apache.hive.kubernetes.operator.reconciler.HiveClusterReconciler; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** Entry point for the Hive Kubernetes Operator process. */ +public final class HiveOperatorMain { + + private static final Logger LOG = + LoggerFactory.getLogger(HiveOperatorMain.class); + + private HiveOperatorMain() { + } + + /** Starts the operator, registers reconcilers, and blocks until shutdown. */ + public static void main(String[] args) { + LOG.info("Starting Hive Kubernetes Operator"); + // Disable SSA-based matching for dependent resources. + // JOSDK's SSA matcher relies on Kubernetes managedFields entries + // to detect diffs. When managedFields is absent or incomplete + // (common with Docker Desktop and some K8s distributions), the + // matcher always returns "not matched", causing the operator to + // re-apply every dependent on every reconciliation and creating + // an infinite update loop. The classic GenericKubernetesResource + // matcher compares the desired spec directly and is immune to this. + Operator operator = new Operator(overrider -> overrider + .withSSABasedCreateUpdateMatchForDependentResources(false) + .withPreviousAnnotationForDependentResources(false)); + operator.register(new HiveClusterReconciler()); + operator.start(); + LOG.info("Hive Kubernetes Operator started successfully"); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HadoopConfigMapDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HadoopConfigMapDependent.java new file mode 100644 index 000000000000..ede10d7e8036 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HadoopConfigMapDependent.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.Map; + +import io.fabric8.kubernetes.api.model.ConfigMap; +import io.fabric8.kubernetes.api.model.ConfigMapBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; +import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** Manages the Hadoop core-site.xml ConfigMap for filesystem configuration. */ +@KubernetesDependent( + labelSelector = "app.kubernetes.io/component=hadoop-config," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator" +) +public class HadoopConfigMapDependent + extends HiveDependentResource { + + public static final String COMPONENT = "hadoop-config"; + + public HadoopConfigMapDependent() { + super(ConfigMap.class); + } + + @Override + protected ConfigMap desired(HiveCluster hiveCluster, + Context context) { + Map props = + HiveConfigBuilder.getHadoopCoreSite(hiveCluster.getSpec()); + + return new ConfigMapBuilder() + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) + .endMetadata() + .addToData("core-site.xml", HadoopXmlBuilder.buildXml(props)) + .build(); + } + + /** Returns the ConfigMap resource name for this HiveCluster. */ + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-hadoop-config"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveDependentResource.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveDependentResource.java new file mode 100644 index 000000000000..851fb7bb7836 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveDependentResource.java @@ -0,0 +1,531 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; + +import java.util.Iterator; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.TreeMap; +import java.util.concurrent.ConcurrentHashMap; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.fabric8.kubernetes.api.model.Container; +import io.fabric8.kubernetes.api.model.ContainerBuilder; +import io.fabric8.kubernetes.api.model.EnvVar; +import io.fabric8.kubernetes.api.model.EnvVarBuilder; +import io.fabric8.kubernetes.api.model.HasMetadata; +import io.fabric8.kubernetes.api.model.Quantity; +import io.fabric8.kubernetes.api.model.ResourceRequirements; +import io.fabric8.kubernetes.api.model.Probe; +import io.fabric8.kubernetes.api.model.ProbeBuilder; +import io.fabric8.kubernetes.api.model.IntOrString; +import io.fabric8.kubernetes.api.model.ResourceRequirementsBuilder; +import io.fabric8.kubernetes.api.model.Volume; +import io.fabric8.kubernetes.api.model.VolumeBuilder; +import io.fabric8.kubernetes.api.model.VolumeMount; +import io.fabric8.kubernetes.api.model.VolumeMountBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.processing.dependent.Matcher; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.CRUDKubernetesDependentResource; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.spec.DatabaseConfig; +import org.apache.hive.kubernetes.operator.model.spec.ResourceRequirementsSpec; + +import org.apache.hive.kubernetes.operator.model.spec.SecretKeyRef; +import org.apache.hive.kubernetes.operator.model.spec.ProbeSpec; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Base class for all Hive operator dependent resources. + *

+ * Overrides {@link #getSecondaryResource} to use this dependent's own + * event source instead of the generic type-based lookup. This is + * required because JOSDK 4.9.x's default implementation calls + * {@code context.getSecondaryResource(type)} which throws when + * multiple dependents manage the same Kubernetes resource type + * (e.g. multiple ConfigMap or Service dependents). + */ +public abstract class HiveDependentResource + extends CRUDKubernetesDependentResource { + + private static final Logger LOG = + LoggerFactory.getLogger(HiveDependentResource.class); + private static final ObjectMapper MAPPER = new ObjectMapper() + .configure(SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS, true); + + protected static final String CONF_MOUNT_PATH = "/etc/hive/conf"; + protected static final String HIVE_CONF_DIR = "/opt/hive/conf"; + protected static final String EXT_JARS_PATH = "/tmp/ext-jars"; + /** + * Stores SHA-256 hashes of the last desired spec that was applied + * for each resource, keyed by namespace/name. This lets us skip + * updates when the desired state has not changed, avoiding the + * annotation writes and generation bumps that cause infinite + * reconciliation loops on Docker Desktop Kubernetes. + */ + private static final ConcurrentHashMap + LAST_DESIRED_HASHES = new ConcurrentHashMap<>(); + + protected HiveDependentResource(Class resourceType) { + super(resourceType); + } + + /** + * Disable Server-Side Apply. SSA on Docker Desktop Kubernetes causes + * dual ReplicaSet creation (two SSA applies within the same second + * produce different pod template hashes). Standard create/update + * combined with our custom hash-based {@link #match} is sufficient. + */ + @Override + protected boolean useSSA(Context

context) { + return false; + } + + @Override + public Optional getSecondaryResource(P primary, + Context

context) { + return eventSource() + .flatMap(es -> es.getSecondaryResource(primary)); + } + + /** + * Custom match that compares an SHA-256 hash of the desired resource + * spec against the last applied hash. Overrides the 3-arg entry + * point because that is what JOSDK's reconcile loop actually calls. + *

+ * The parent's 3-arg match delegates to a 5-arg method that calls + * {@code addMetadata()} unconditionally — writing the + * {@code javaoperatorsdk.io/previous} annotation on every + * reconciliation. On Docker Desktop, that annotation write bumps + * {@code metadata.generation}, which triggers a new informer event, + * causing an infinite reconciliation loop. + *

+ * By intercepting here we avoid both the annotation write and the + * false-positive diffs from K8s-injected defaults (protocol: TCP, + * terminationGracePeriodSeconds, etc.) when the desired spec has + * not actually changed. + */ + @Override + public Matcher.Result match(R actual, P primary, + Context

context) { + R desired = desired(primary, context); + String resourceKey = desired.getKind() + + "/" + desired.getMetadata().getNamespace() + + "/" + desired.getMetadata().getName(); + String desiredHash = computeHash(desired); + if (actual == null) { + if (desiredHash != null) { + String previousHash = LAST_DESIRED_HASHES.get(resourceKey); + if (Objects.equals(previousHash, desiredHash)) { + // Resource was created in a previous reconciliation but + // the informer hasn't indexed it yet. Returning false + // would trigger another SSA apply, which fires another + // informer event, creating an infinite reconciliation + // loop on Docker Desktop. Skip the re-creation. + LOG.debug("Resource {} already created (informer lag), " + + "skipping re-create", resourceKey); + return Matcher.Result.computed(true, desired); + } + // First creation — cache the hash so the next + // reconciliation can detect informer lag. + LOG.info("Creating resource {}", resourceKey); + LAST_DESIRED_HASHES.put(resourceKey, desiredHash); + } + return Matcher.Result.computed(false, desired); + } + if (desiredHash == null) { + // Serialization failed — delegate to parent which will + // call addMetadata + the real matcher + return super.match(actual, primary, context); + } + // Jobs and PVCs are immutable after creation — never update. + String kind = actual.getKind(); + if ("Job".equals(kind) || "PersistentVolumeClaim".equals(kind)) { + LAST_DESIRED_HASHES.put(resourceKey, desiredHash); + return Matcher.Result.computed(true, desired); + } + String previousHash = LAST_DESIRED_HASHES.get(resourceKey); + if (previousHash == null) { + // First reconciliation after operator start — the resource + // already exists so seed the cache without triggering an + // update. This prevents a gratuitous rolling update caused + // by K8s default-value injection (protocol: TCP, etc.). + LOG.info("Seeding hash for existing resource {}, skipping update", + resourceKey); + LAST_DESIRED_HASHES.put(resourceKey, desiredHash); + return Matcher.Result.computed(true, desired); + } + if (desiredHash.equals(previousHash)) { + LOG.debug("Desired spec unchanged for {}, skipping update", + resourceKey); + return Matcher.Result.computed(true, desired); + } + LOG.info("Desired spec changed for {}, will update", resourceKey); + LAST_DESIRED_HASHES.put(resourceKey, desiredHash); + return Matcher.Result.computed(false, desired); + } + + private String computeHash(R resource) { + try { + JsonNode tree = MAPPER.valueToTree(resource); + sortJsonNode(tree); + String json = MAPPER.writeValueAsString(tree); + MessageDigest digest = MessageDigest.getInstance("SHA-256"); + byte[] hash = digest.digest( + json.getBytes(StandardCharsets.UTF_8)); + StringBuilder sb = new StringBuilder(64); + for (byte b : hash) { + sb.append(String.format("%02x", b)); + } + return sb.toString(); + } catch (Exception e) { + LOG.warn("Failed to compute hash for resource {}: {}", + resource.getMetadata().getName(), e.getMessage()); + return null; + } + } + + /** Recursively sort all object node keys for deterministic JSON. */ + private static void sortJsonNode(JsonNode node) { + if (node.isObject()) { + ObjectNode obj = (ObjectNode) node; + TreeMap sorted = new TreeMap<>(); + Iterator fieldNames = obj.fieldNames(); + while (fieldNames.hasNext()) { + String name = fieldNames.next(); + JsonNode child = obj.get(name); + sortJsonNode(child); + sorted.put(name, child); + } + obj.removeAll(); + sorted.forEach(obj::set); + } else if (node.isArray()) { + ArrayNode arr = (ArrayNode) node; + for (int i = 0; i < arr.size(); i++) { + sortJsonNode(arr.get(i)); + } + sortArrayNode(arr); + } + } + + /** + * Sort array elements by a stable key to make hashing order-independent. + * Uses "name" field if present (env vars, volumes, containers, ports), + * falls back to "mountPath" (volume mounts), then serialized form. + */ + private static void sortArrayNode(ArrayNode arr) { + if (arr.size() <= 1 || !arr.get(0).isObject()) { + return; + } + + List sortedElements = StreamSupport.stream(arr.spliterator(), false) + .sorted(Comparator.comparing(node -> + node.has("name") ? node.get("name").asText() : + node.has("mountPath") ? node.get("mountPath").asText() : + node.toString() + )) + .collect(Collectors.toList()); + + arr.removeAll(); + sortedElements.forEach(arr::add); + } + + /** + * Computes a SHA-256 hash of the given input strings. + * Used to annotate pod templates so that config changes trigger rolling updates. + */ + protected static String sha256(String... inputs) { + try { + MessageDigest digest = MessageDigest.getInstance("SHA-256"); + for (String input : inputs) { + if (input != null) { + digest.update(input.getBytes(StandardCharsets.UTF_8)); + } + } + byte[] hash = digest.digest(); + StringBuilder sb = new StringBuilder(64); + for (byte b : hash) { + sb.append(String.format("%02x", b)); + } + return sb.toString(); + } catch (Exception e) { + return "unknown"; + } + } + + /** + * Builds the database connection env vars: DB_DRIVER, DBPASSWORD + * (from SecretKeyRef), and SERVICE_OPTS with javax.jdo connection + * properties. Shared by MetastoreDeploymentDependent and + * SchemaInitJobDependent. + */ + protected static List buildDbEnvVars(DatabaseConfig db) { + List envVars = new ArrayList<>(); + envVars.add(new EnvVar("DB_DRIVER", db.type(), null)); + + // DBPASSWORD must be defined before SERVICE_OPTS so that + // Kubernetes $(DBPASSWORD) interpolation resolves correctly. + SecretKeyRef passwordRef = db.passwordSecretRef(); + if (passwordRef != null) { + envVars.add(new EnvVarBuilder() + .withName("DBPASSWORD") + .withNewValueFrom() + .withNewSecretKeyRef() + .withName(passwordRef.name()) + .withKey(passwordRef.key()) + .endSecretKeyRef() + .endValueFrom() + .build()); + } + + StringBuilder serviceOpts = new StringBuilder(); + if (db.url() != null) { + serviceOpts.append("-Djavax.jdo.option.ConnectionURL=") + .append(db.url()); + } + if (db.driver() != null) { + serviceOpts.append(" -Djavax.jdo.option.ConnectionDriverName=") + .append(db.driver()); + } + if (db.username() != null) { + serviceOpts.append(" -Djavax.jdo.option.ConnectionUserName=") + .append(db.username()); + } + if (passwordRef != null) { + serviceOpts.append( + " -Djavax.jdo.option.ConnectionPassword=$(DBPASSWORD)"); + } + if (!serviceOpts.isEmpty()) { + envVars.add(new EnvVar("SERVICE_OPTS", + serviceOpts.toString().trim(), null)); + } + return envVars; + } + + + /** Builds a projected Volume merging multiple ConfigMaps. */ + protected static Volume buildProjectedConfigVolume( + String volumeName, String... configMapNames) { + List + projections = new ArrayList<>(); + for (String cmName : configMapNames) { + projections.add( + new io.fabric8.kubernetes.api.model.VolumeProjectionBuilder() + .withNewConfigMap().withName(cmName).endConfigMap() + .build()); + } + return new VolumeBuilder() + .withName(volumeName) + .withNewProjected() + .withSources(projections) + .endProjected() + .build(); + } + + + /** + * Populates volume mounts and volumes for the Metastore pod spec + * (shared by MetastoreDeploymentDependent and SchemaInitJobDependent). + * Adds the projected hive-config volume (merging metastore + hadoop + * ConfigMaps). External JARs (JDBC driver + global externalJars) + * should be handled separately via {@link #addExternalJars}. + */ + protected static void buildMetastoreVolumes( + HiveCluster hiveCluster, + List volumeMounts, + List volumes) { + + volumeMounts.add(new VolumeMountBuilder() + .withName("hive-config") + .withMountPath(CONF_MOUNT_PATH).build()); + + volumes.add(buildProjectedConfigVolume("hive-config", + MetastoreConfigMapDependent.resourceName(hiveCluster), + HadoopConfigMapDependent.resourceName(hiveCluster))); + } + + /** Builds Kubernetes ResourceRequirements from the operator's spec. */ + protected static ResourceRequirements buildResources(ResourceRequirementsSpec spec) { + if (spec == null) { + return new ResourceRequirements(); + } + ResourceRequirementsBuilder builder = new ResourceRequirementsBuilder(); + if (spec.requestsCpu() != null) { + builder.addToRequests("cpu", new Quantity(spec.requestsCpu())); + } + if (spec.requestsMemory() != null) { + builder.addToRequests("memory", new Quantity(spec.requestsMemory())); + } + if (spec.limitsCpu() != null) { + builder.addToLimits("cpu", new Quantity(spec.limitsCpu())); + } + if (spec.limitsMemory() != null) { + builder.addToLimits("memory", new Quantity(spec.limitsMemory())); + } + return builder.build(); + } + + /** + * Builds an init container that downloads external JARs via wget + * (for http/https URLs) or hadoop fs (for HDFS/cloud paths). + */ + protected static Container buildExternalJarsInitContainer( + String image, List externalJars, + List envVars, List volumeMounts, + String containerName) { + + // Determine target directory from the first volume mount + String targetDir = volumeMounts.get(0).getMountPath(); + + StringBuilder cmd = new StringBuilder(); + cmd.append("export HADOOP_CONF_DIR=").append(CONF_MOUNT_PATH).append(" && "); + + for (String jarUrl : externalJars) { + if (jarUrl.startsWith("http://") || jarUrl.startsWith("https://")) { + cmd.append("wget -q -P ").append(targetDir).append(" '").append(jarUrl).append("' && "); + } else { + cmd.append("hadoop fs -copyToLocal '").append(jarUrl).append("' ").append(targetDir).append("/ && "); + } + } + cmd.append("echo 'All external JARs downloaded successfully.'"); + + return new ContainerBuilder() + .withName(containerName) + .withImage(image) + .withCommand("/bin/bash", "-c", cmd.toString()) + .withEnv(envVars) + .withVolumeMounts(volumeMounts) + .build(); + } + + /** + * Replaces the directory-level CONF_MOUNT_PATH volume mount with + * individual subPath mounts into HIVE_CONF_DIR (/opt/hive/conf/). + *

+ * This avoids the broken-symlink problem: Kubernetes projected volumes + * use internal timestamped directories that rotate on ConfigMap updates. + * The Hive Docker entrypoint symlinks resolved paths (not the stable + * {@code ..data/} link), so symlinks break when the directory rotates. + * subPath mounts place files directly without symlink indirection. + *

+ * Call this AFTER {@code addGlobalExternalJars} so the init container + * can still find the CONF_MOUNT_PATH mount. + */ + protected static void replaceConfMountWithSubPaths( + List volumeMounts, String volumeName, + String... fileNames) { + volumeMounts.removeIf( + vm -> vm.getMountPath().equals(CONF_MOUNT_PATH)); + for (String file : fileNames) { + volumeMounts.add(new VolumeMountBuilder() + .withName(volumeName) + .withMountPath(HIVE_CONF_DIR + "/" + file) + .withSubPath(file) + .build()); + } + } + + + /** + * Adds external JAR download init container, volume, and + * volume mount. Downloads to /tmp/ext-jars so the native + * Hive entrypoint.sh automatically copies them to $HIVE_HOME/lib. + */ + protected static void addExternalJars( + String image, + List jars, + List initContainers, + List volumeMounts, + List volumes, + List envVars) { + if (jars == null || jars.isEmpty()) { + return; + } + + VolumeMount extMount = new VolumeMountBuilder() + .withName("ext-jars") + .withMountPath(EXT_JARS_PATH).build(); + + // Add volume mount for the main container + volumeMounts.add(extMount); + + // Add emptyDir volume + volumes.add(new VolumeBuilder() + .withName("ext-jars") + .withNewEmptyDir().endEmptyDir().build()); + + // Build init container with config mount + ext-jars mount + List initMounts = new ArrayList<>(); + initMounts.add(extMount); + for (VolumeMount vm : volumeMounts) { + if (vm.getMountPath().equals(CONF_MOUNT_PATH)) { + initMounts.add(vm); + break; + } + } + + initContainers.add( + buildExternalJarsInitContainer(image, jars, + envVars, initMounts, "download-ext-jars")); + } + + /** + * Builds a TCP socket probe using user-provided overrides or fallback defaults. + */ + protected static Probe buildTcpProbe(int port, ProbeSpec spec, int defaultInitialDelay, int defaultPeriod, + int defaultFailureThreshold) { + + int initialDelay = + (spec != null && spec.initialDelaySeconds() != null) ? spec.initialDelaySeconds() : defaultInitialDelay; + int period = (spec != null && spec.periodSeconds() != null) ? spec.periodSeconds() : defaultPeriod; + int failureThreshold = + (spec != null && spec.failureThreshold() != null) ? spec.failureThreshold() : defaultFailureThreshold; + + ProbeBuilder builder = new ProbeBuilder() + .withNewTcpSocket() + .withPort(new IntOrString(port)) + .endTcpSocket() + .withInitialDelaySeconds(initialDelay) + .withPeriodSeconds(period) + .withFailureThreshold(failureThreshold); + + if (spec != null && spec.timeoutSeconds() != null) { + builder.withTimeoutSeconds(spec.timeoutSeconds()); + } + if (spec != null && spec.successThreshold() != null) { + builder.withSuccessThreshold(spec.successThreshold()); + } + return builder.build(); + } + +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ConfigMapDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ConfigMapDependent.java new file mode 100644 index 000000000000..088cfbe2fa2a --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ConfigMapDependent.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.Map; + +import io.fabric8.kubernetes.api.model.ConfigMap; +import io.fabric8.kubernetes.api.model.ConfigMapBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; +import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; +import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** Manages the hive-site.xml ConfigMap for HiveServer2. */ +@KubernetesDependent( + labelSelector = "app.kubernetes.io/component=hiveserver2," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator" +) +public class HiveServer2ConfigMapDependent + extends HiveDependentResource { + + public static final String COMPONENT = "hiveserver2"; + + public HiveServer2ConfigMapDependent() { + super(ConfigMap.class); + } + + @Override + protected ConfigMap desired(HiveCluster hiveCluster, + Context context) { + HiveClusterSpec spec = hiveCluster.getSpec(); + + Map props = + HiveConfigBuilder.getHiveServer2HiveSite(hiveCluster, spec); + Map tezProps = HiveConfigBuilder.getTezSite(spec); + + return new ConfigMapBuilder() + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) + .endMetadata() + .addToData("hive-site.xml", HadoopXmlBuilder.buildXml(props)) + .addToData("tez-site.xml", HadoopXmlBuilder.buildXml(tezProps)) + .build(); + } + + /** Returns the ConfigMap resource name for this HiveCluster. */ + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-hiveserver2-config"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2DeploymentDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2DeploymentDependent.java new file mode 100644 index 000000000000..29e3db3f31f0 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2DeploymentDependent.java @@ -0,0 +1,233 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import io.fabric8.kubernetes.api.model.Container; +import io.fabric8.kubernetes.api.model.ContainerPort; +import io.fabric8.kubernetes.api.model.ContainerPortBuilder; +import io.fabric8.kubernetes.api.model.EnvVar; +import io.fabric8.kubernetes.api.model.Probe; +import io.fabric8.kubernetes.api.model.apps.Deployment; +import io.fabric8.kubernetes.api.model.apps.DeploymentBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; +import org.apache.hive.kubernetes.operator.model.spec.HiveServer2Spec; +import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; +import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** Manages the Kubernetes Deployment for HiveServer2. */ +@KubernetesDependent( + labelSelector = "app.kubernetes.io/component=hiveserver2," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator" +) +public class HiveServer2DeploymentDependent + extends HiveDependentResource { + + public static final String COMPONENT = "hiveserver2"; + private static final String SCRATCH_MOUNT_PATH = "/opt/hive/scratch"; + + public HiveServer2DeploymentDependent() { + super(Deployment.class); + } + + @Override + protected Deployment desired(HiveCluster hiveCluster, + Context context) { + HiveClusterSpec spec = hiveCluster.getSpec(); + HiveServer2Spec hs2 = spec.hiveServer2(); + Map selectorLabels = + Labels.selectorForComponent(hiveCluster, COMPONENT); + + List envVars = new ArrayList<>(); + envVars.add(new EnvVar("SERVICE_NAME", "hiveserver2", null)); + envVars.add(new EnvVar("IS_RESUME", "true", null)); + envVars.add(new EnvVar("TEZ_AM_EXTERNAL_ID", + "tez-session-hs2", null)); + + // User-provided env vars (storage credentials, etc.) + if (spec.envVars() != null) { + envVars.addAll(spec.envVars()); + } + + // Env vars consumed by the Hive Docker entrypoint.sh to + // configure Tez execution mode at container startup. + if (spec.tezAm().isEnabled()) { + envVars.add(new EnvVar("HIVE_SERVER2_TEZ_USE_EXTERNAL_SESSIONS", + "true", null)); + envVars.add(new EnvVar("TEZ_FRAMEWORK_MODE", + "STANDALONE_ZOOKEEPER", null)); + envVars.add(new EnvVar("HIVE_ZOOKEEPER_QUORUM", + spec.zookeeper().quorum(), null)); + } + + if (spec.llap().isEnabled()) { + envVars.add(new EnvVar("HIVE_LLAP_DAEMON_SERVICE_HOSTS", + spec.llap().serviceHosts(), null)); + } + + String metastoreUri = spec.metastore().isEnabled() ? + "thrift://" + hiveCluster.getMetadata().getName() + "-metastore:9083" : + spec.metastore().externalUri(); + StringBuilder serviceOpts = new StringBuilder(); + if (metastoreUri != null && !metastoreUri.isEmpty()) { + serviceOpts.append("-Dhive.metastore.uris=").append(metastoreUri); + } + if (spec.llap().isEnabled()) { + serviceOpts.append(" -Dhive.execution.mode=llap"); + serviceOpts.append(" -Dhive.llap.daemon.service.hosts=") + .append(spec.llap().serviceHosts()); + } + if (spec.tezAm().isEnabled()) { + serviceOpts.append(" -Dhive.zookeeper.quorum=") + .append(spec.zookeeper().quorum()); + } + envVars.add(new EnvVar("SERVICE_OPTS", + serviceOpts.toString(), null)); + + List ports = List.of( + new ContainerPortBuilder() + .withName("thrift") + .withContainerPort(hs2.thriftPort()).build(), + new ContainerPortBuilder() + .withName("webui") + .withContainerPort(hs2.webUiPort()).build() + ); + + Probe readinessProbe = buildTcpProbe(hs2.thriftPort(), hs2.readinessProbe(), 15, 10, 3); + Probe livenessProbe = buildTcpProbe(hs2.thriftPort(), hs2.livenessProbe(), 120, 30, 10); + + boolean tezAmEnabled = spec.tezAm().isEnabled(); + + // Build volume mounts and volumes lists up front so the + // Deployment is constructed in a single builder chain. + // Using editFirstContainer() caused JOSDK SSA comparison + // mismatches that triggered infinite reconciliation loops. + List volumeMounts = + new ArrayList<>(); + volumeMounts.add(new io.fabric8.kubernetes.api.model.VolumeMountBuilder() + .withName("hive-config").withMountPath(CONF_MOUNT_PATH).build()); + + List volumes = + new ArrayList<>(); + volumes.add(buildProjectedConfigVolume("hive-config", + HiveServer2ConfigMapDependent.resourceName(hiveCluster), + HadoopConfigMapDependent.resourceName(hiveCluster))); + + if (tezAmEnabled) { + volumeMounts.add( + new io.fabric8.kubernetes.api.model.VolumeMountBuilder() + .withName("scratch") + .withMountPath(SCRATCH_MOUNT_PATH).build()); + volumes.add(new io.fabric8.kubernetes.api.model.VolumeBuilder() + .withName("scratch") + .withNewPersistentVolumeClaim() + .withClaimName(ScratchPvcDependent + .resourceName(hiveCluster)) + .endPersistentVolumeClaim() + .build()); + } + + List initContainers = new ArrayList<>(); + List allJars = new ArrayList<>(); + if (spec.externalJars() != null) { + allJars.addAll(spec.externalJars()); + } + if (hs2.externalJars() != null) { + allJars.addAll(hs2.externalJars()); + } + addExternalJars(spec.image(), allJars, + initContainers, volumeMounts, volumes, envVars); + replaceConfMountWithSubPaths(volumeMounts, "hive-config", + "hive-site.xml", "tez-site.xml", "core-site.xml"); + + // Pre-compute config hash for the pod template annotation. + // This ensures the Deployment is created with the correct hash + // from the start (single ReplicaSet) and triggers rolling + // updates when ConfigMap content changes. + String configHash = sha256( + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHiveServer2HiveSite(hiveCluster, spec)), + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getTezSite(spec)), + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHadoopCoreSite(spec))); + + Deployment deployment = new DeploymentBuilder() + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) + .endMetadata() + .withNewSpec() + .withReplicas(hs2.replicas()) + .withNewSelector() + .withMatchLabels(selectorLabels) + .endSelector() + .withNewTemplate() + .withNewMetadata() + .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) + .addToAnnotations("kubectl.kubernetes.io/default-container", "hiveserver2") + .addToAnnotations("hive.apache.org/config-hash", configHash) + .endMetadata() + .withNewSpec() + .withInitContainers(initContainers) + .addNewContainer() + .withName("hiveserver2") + .withImage(spec.image()) + .withImagePullPolicy(spec.imagePullPolicy()) + .withEnv(envVars) + .withPorts(ports) + .withReadinessProbe(readinessProbe) + .withLivenessProbe(livenessProbe) + .withResources(buildResources(hs2.resources())) + .withVolumeMounts(volumeMounts) + .endContainer() + .withVolumes(volumes) + .endSpec() + .endTemplate() + .endSpec() + .build(); + + if (spec.volumes() != null) { + deployment.getSpec().getTemplate().getSpec().getVolumes().addAll(spec.volumes()); + } + if (spec.volumeMounts() != null) { + deployment.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() + .addAll(spec.volumeMounts()); + } + if (hs2.extraVolumes() != null) { + deployment.getSpec().getTemplate().getSpec().getVolumes().addAll(hs2.extraVolumes()); + } + if (hs2.extraVolumeMounts() != null) { + deployment.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() + .addAll(hs2.extraVolumeMounts()); + } + + return deployment; + } + + /** Returns the Deployment resource name for this HiveCluster. */ + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-hiveserver2"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ServiceDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ServiceDependent.java new file mode 100644 index 000000000000..065524b17753 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ServiceDependent.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import io.fabric8.kubernetes.api.model.IntOrString; +import io.fabric8.kubernetes.api.model.Service; +import io.fabric8.kubernetes.api.model.ServiceBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.spec.HiveServer2Spec; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** Manages the Kubernetes Service for HiveServer2 (Thrift and WebUI ports). */ +@KubernetesDependent( + labelSelector = "app.kubernetes.io/component=hiveserver2," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator" +) +public class HiveServer2ServiceDependent + extends HiveDependentResource { + + public HiveServer2ServiceDependent() { + super(Service.class); + } + + @Override + protected Service desired(HiveCluster hiveCluster, + Context context) { + HiveServer2Spec hs2 = hiveCluster.getSpec().hiveServer2(); + + return new ServiceBuilder() + .withNewMetadata() + .withName(hiveCluster.getMetadata().getName() + "-hiveserver2") + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, + HiveServer2DeploymentDependent.COMPONENT)) + .endMetadata() + .withNewSpec() + .withType(hs2.serviceType()) + .withSelector(Labels.selectorForComponent(hiveCluster, + HiveServer2DeploymentDependent.COMPONENT)) + .addNewPort() + .withName("thrift") + .withPort(hs2.thriftPort()) + .withTargetPort(new IntOrString(hs2.thriftPort())) + .endPort() + .addNewPort() + .withName("webui") + .withPort(hs2.webUiPort()) + .withTargetPort(new IntOrString(hs2.webUiPort())) + .endPort() + .endSpec() + .build(); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapConfigMapDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapConfigMapDependent.java new file mode 100644 index 000000000000..f945f0a75f23 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapConfigMapDependent.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.Map; + +import io.fabric8.kubernetes.api.model.ConfigMap; +import io.fabric8.kubernetes.api.model.ConfigMapBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; +import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** Manages the llap-daemon-site.xml ConfigMap for LLAP daemons. */ +@KubernetesDependent( + labelSelector = "app.kubernetes.io/component=llap," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator" +) +public class LlapConfigMapDependent + extends HiveDependentResource { + + public static final String COMPONENT = "llap"; + + public LlapConfigMapDependent() { + super(ConfigMap.class); + } + + @Override + protected ConfigMap desired(HiveCluster hiveCluster, + Context context) { + Map props = + HiveConfigBuilder.getLlapDaemonSite(hiveCluster.getSpec()); + + return new ConfigMapBuilder() + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) + .endMetadata() + .addToData("llap-daemon-site.xml", + HadoopXmlBuilder.buildXml(props)) + .build(); + } + + /** Returns the ConfigMap resource name for this HiveCluster. */ + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-llap-config"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapServiceDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapServiceDependent.java new file mode 100644 index 000000000000..30d5933a1b4f --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapServiceDependent.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import io.fabric8.kubernetes.api.model.IntOrString; +import io.fabric8.kubernetes.api.model.Service; +import io.fabric8.kubernetes.api.model.ServiceBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * Manages the headless Kubernetes Service for LLAP daemons. + * Required by the StatefulSet for stable DNS entries and ZooKeeper registration. + */ +@KubernetesDependent( + labelSelector = "app.kubernetes.io/component=llap," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator" +) +public class LlapServiceDependent + extends HiveDependentResource { + + public LlapServiceDependent() { + super(Service.class); + } + + @Override + protected Service desired(HiveCluster hiveCluster, + Context context) { + return new ServiceBuilder() + .withNewMetadata() + .withName(hiveCluster.getMetadata().getName() + "-llap") + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, + LlapStatefulSetDependent.COMPONENT)) + .endMetadata() + .withNewSpec() + .withClusterIP("None") + .withSelector(Labels.selectorForComponent(hiveCluster, + LlapStatefulSetDependent.COMPONENT)) + .addNewPort() + .withName("management") + .withPort(15004) + .withTargetPort(new IntOrString(15004)) + .endPort() + .addNewPort() + .withName("shuffle") + .withPort(15551) + .withTargetPort(new IntOrString(15551)) + .endPort() + .addNewPort() + .withName("web") + .withPort(15002) + .withTargetPort(new IntOrString(15002)) + .endPort() + .endSpec() + .build(); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapStatefulSetDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapStatefulSetDependent.java new file mode 100644 index 000000000000..d36b0ad0d4d1 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapStatefulSetDependent.java @@ -0,0 +1,179 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import io.fabric8.kubernetes.api.model.Container; +import io.fabric8.kubernetes.api.model.ContainerPort; +import io.fabric8.kubernetes.api.model.ContainerPortBuilder; +import io.fabric8.kubernetes.api.model.EnvVar; +import io.fabric8.kubernetes.api.model.Probe; +import io.fabric8.kubernetes.api.model.apps.StatefulSet; +import io.fabric8.kubernetes.api.model.apps.StatefulSetBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; +import org.apache.hive.kubernetes.operator.model.spec.LlapSpec; +import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; +import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * Manages the Kubernetes StatefulSet for LLAP daemons. + * Uses StatefulSet for stable pod identities required by ZooKeeper registration. + */ +@KubernetesDependent( + labelSelector = "app.kubernetes.io/component=llap," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator" +) +public class LlapStatefulSetDependent + extends HiveDependentResource { + + public static final String COMPONENT = "llap"; + + public LlapStatefulSetDependent() { + super(StatefulSet.class); + } + + @Override + protected StatefulSet desired(HiveCluster hiveCluster, + Context context) { + HiveClusterSpec spec = hiveCluster.getSpec(); + LlapSpec llap = spec.llap(); + Map selectorLabels = + Labels.selectorForComponent(hiveCluster, COMPONENT); + + List envVars = new ArrayList<>(); + envVars.add(new EnvVar("SERVICE_NAME", "llap", null)); + envVars.add(new EnvVar("IS_RESUME", "true", null)); + envVars.add(new EnvVar("LLAP_MEMORY_MB", + String.valueOf(llap.memoryMb()), null)); + envVars.add(new EnvVar("LLAP_EXECUTORS", + String.valueOf(llap.executors()), null)); + envVars.add(new EnvVar("HIVE_ZOOKEEPER_QUORUM", + spec.zookeeper().quorum(), null)); + envVars.add(new EnvVar("HIVE_LLAP_DAEMON_SERVICE_HOSTS", + llap.serviceHosts(), null)); + + // User-provided env vars (storage credentials, etc.) + if (spec.envVars() != null) { + envVars.addAll(spec.envVars()); + } + + List ports = List.of( + new ContainerPortBuilder() + .withName("management").withContainerPort(15004).build(), + new ContainerPortBuilder() + .withName("shuffle").withContainerPort(15551).build(), + new ContainerPortBuilder() + .withName("web").withContainerPort(15002).build(), + new ContainerPortBuilder() + .withName("output").withContainerPort(15003).build() + ); + + Probe readinessProbe = buildTcpProbe(15004, llap.readinessProbe(), 15, 10, 3); + + String headlessServiceName = + hiveCluster.getMetadata().getName() + "-llap"; + + List volumeMounts = + new ArrayList<>(); + volumeMounts.add(new io.fabric8.kubernetes.api.model.VolumeMountBuilder() + .withName("llap-config") + .withMountPath(CONF_MOUNT_PATH).build()); + + List volumes = + new ArrayList<>(); + volumes.add(buildProjectedConfigVolume("llap-config", + LlapConfigMapDependent.resourceName(hiveCluster), + HadoopConfigMapDependent.resourceName(hiveCluster))); + + List initContainers = new ArrayList<>(); + addExternalJars(spec.image(), spec.externalJars(), + initContainers, volumeMounts, volumes, envVars); + replaceConfMountWithSubPaths(volumeMounts, "llap-config", + "llap-daemon-site.xml", "core-site.xml"); + + // Pre-compute config hash for the pod template annotation. + String configHash = sha256( + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getLlapDaemonSite(spec)), + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHadoopCoreSite(spec))); + + StatefulSet statefulSet = new StatefulSetBuilder() + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) + .endMetadata() + .withNewSpec() + .withReplicas(llap.replicas()) + .withServiceName(headlessServiceName) + .withNewSelector() + .withMatchLabels(selectorLabels) + .endSelector() + .withNewTemplate() + .withNewMetadata() + .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) + .addToAnnotations("kubectl.kubernetes.io/default-container", "llap") + .addToAnnotations("hive.apache.org/config-hash", configHash) + .endMetadata() + .withNewSpec() + .withInitContainers(initContainers) + .addNewContainer() + .withName("llap") + .withImage(spec.image()) + .withImagePullPolicy(spec.imagePullPolicy()) + .withEnv(envVars) + .withPorts(ports) + .withReadinessProbe(readinessProbe) + .withResources(buildResources(llap.resources())) + .withVolumeMounts(volumeMounts) + .endContainer() + .withVolumes(volumes) + .endSpec() + .endTemplate() + .endSpec() + .build(); + + if (spec.volumes() != null) { + statefulSet.getSpec().getTemplate().getSpec().getVolumes().addAll(spec.volumes()); + } + if (spec.volumeMounts() != null) { + statefulSet.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() + .addAll(spec.volumeMounts()); + } + if (llap.extraVolumes() != null) { + statefulSet.getSpec().getTemplate().getSpec().getVolumes().addAll(llap.extraVolumes()); + } + if (llap.extraVolumeMounts() != null) { + statefulSet.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() + .addAll(llap.extraVolumeMounts()); + } + return statefulSet; + } + + /** Returns the StatefulSet resource name for this HiveCluster. */ + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-llap"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreConfigMapDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreConfigMapDependent.java new file mode 100644 index 000000000000..eab7dc5536e4 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreConfigMapDependent.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.Map; + +import io.fabric8.kubernetes.api.model.ConfigMap; +import io.fabric8.kubernetes.api.model.ConfigMapBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; +import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** Manages the metastore-site.xml ConfigMap for the Hive Metastore. */ +@KubernetesDependent( + labelSelector = "app.kubernetes.io/component=metastore," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator" +) +public class MetastoreConfigMapDependent + extends HiveDependentResource { + + public static final String COMPONENT = "metastore"; + + public MetastoreConfigMapDependent() { + super(ConfigMap.class); + } + + @Override + protected ConfigMap desired(HiveCluster hiveCluster, + Context context) { + Map props = + HiveConfigBuilder.getMetastoreSite(hiveCluster.getSpec()); + + return new ConfigMapBuilder() + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) + .endMetadata() + .addToData("metastore-site.xml", HadoopXmlBuilder.buildXml(props)) + .build(); + } + + /** Returns the ConfigMap resource name for this HiveCluster. */ + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-metastore-config"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreDeploymentDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreDeploymentDependent.java new file mode 100644 index 000000000000..f42091c58b5e --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreDeploymentDependent.java @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import io.fabric8.kubernetes.api.model.Container; +import io.fabric8.kubernetes.api.model.ContainerPort; +import io.fabric8.kubernetes.api.model.ContainerPortBuilder; +import io.fabric8.kubernetes.api.model.EnvVar; +import io.fabric8.kubernetes.api.model.Probe; +import io.fabric8.kubernetes.api.model.Volume; +import io.fabric8.kubernetes.api.model.VolumeMount; +import io.fabric8.kubernetes.api.model.apps.Deployment; +import io.fabric8.kubernetes.api.model.apps.DeploymentBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; +import org.apache.hive.kubernetes.operator.model.spec.DatabaseConfig; +import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; +import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** Manages the Kubernetes Deployment for the Hive Metastore. */ +@KubernetesDependent( + labelSelector = "app.kubernetes.io/component=metastore," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator" +) +public class MetastoreDeploymentDependent + extends HiveDependentResource { + + public static final String COMPONENT = "metastore"; + + public MetastoreDeploymentDependent() { + super(Deployment.class); + } + + @Override + protected Deployment desired(HiveCluster hiveCluster, + Context context) { + HiveClusterSpec spec = hiveCluster.getSpec(); + DatabaseConfig db = spec.metastore().database(); + Map selectorLabels = + Labels.selectorForComponent(hiveCluster, COMPONENT); + + List envVars = new ArrayList<>(); + envVars.add(new EnvVar("SERVICE_NAME", "metastore", null)); + envVars.add(new EnvVar("IS_RESUME", "true", null)); + envVars.addAll(buildDbEnvVars(db)); + if (spec.envVars() != null) { + envVars.addAll(spec.envVars()); + } + + List ports = List.of( + new ContainerPortBuilder() + .withName("thrift").withContainerPort(9083).build(), + new ContainerPortBuilder() + .withName("rest").withContainerPort(9001).build() + ); + + Probe readinessProbe = buildTcpProbe(9083, spec.metastore().readinessProbe(), 15, 10, 3); + Probe livenessProbe = buildTcpProbe(9083, spec.metastore().livenessProbe(), 60, 30, 5); + + List initContainers = new ArrayList<>(); + List volumeMounts = new ArrayList<>(); + List volumes = new ArrayList<>(); + buildMetastoreVolumes(hiveCluster, volumeMounts, volumes); + + // Merge JDBC driver JAR with global externalJars into one list + List allJars = new ArrayList<>(); + if (db.driverJarUrl() != null) { + allJars.add(db.driverJarUrl()); + } + if (spec.externalJars() != null) { + allJars.addAll(spec.externalJars()); + } + addExternalJars(spec.image(), allJars, + initContainers, volumeMounts, volumes, envVars); + // Replace directory mount with subPath mounts to avoid + // broken symlinks from K8s ConfigMap rotation. + replaceConfMountWithSubPaths(volumeMounts, "hive-config", + "metastore-site.xml", "core-site.xml"); + + // Pre-compute config hash for the pod template annotation. + // This ensures the Deployment is created with the correct hash + // from the start (single ReplicaSet) and triggers rolling + // updates when ConfigMap content changes. + String configHash = sha256( + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getMetastoreSite(spec)), + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHadoopCoreSite(spec))); + + Deployment deployment = new DeploymentBuilder() + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) + .endMetadata() + .withNewSpec() + .withReplicas(spec.metastore().replicas()) + .withNewSelector() + .withMatchLabels(selectorLabels) + .endSelector() + .withNewTemplate() + .withNewMetadata() + .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) + .addToAnnotations("kubectl.kubernetes.io/default-container", "metastore") + .addToAnnotations("hive.apache.org/config-hash", configHash) + .endMetadata() + .withNewSpec() + .withInitContainers(initContainers) + .addNewContainer() + .withName("metastore") + .withImage(spec.image()) + .withImagePullPolicy(spec.imagePullPolicy()) + .withEnv(envVars) + .withPorts(ports) + .withReadinessProbe(readinessProbe) + .withLivenessProbe(livenessProbe) + .withResources(buildResources( + spec.metastore().resources())) + .withVolumeMounts(volumeMounts) + .endContainer() + .withVolumes(volumes) + .endSpec() + .endTemplate() + .endSpec() + .build(); + + if (spec.volumes() != null) { + deployment.getSpec().getTemplate().getSpec().getVolumes().addAll(spec.volumes()); + } + if (spec.volumeMounts() != null) { + deployment.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() + .addAll(spec.volumeMounts()); + } + if (spec.metastore().extraVolumes() != null) { + deployment.getSpec().getTemplate().getSpec().getVolumes().addAll(spec.metastore().extraVolumes()); + } + if (spec.metastore().extraVolumeMounts() != null) { + deployment.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() + .addAll(spec.metastore().extraVolumeMounts()); + } + return deployment; + } + + /** Returns the Deployment resource name for this HiveCluster. */ + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-metastore"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreServiceDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreServiceDependent.java new file mode 100644 index 000000000000..cf0537897512 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreServiceDependent.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import io.fabric8.kubernetes.api.model.IntOrString; +import io.fabric8.kubernetes.api.model.Service; +import io.fabric8.kubernetes.api.model.ServiceBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** Manages the Kubernetes Service for the Hive Metastore (Thrift + REST ports). */ +@KubernetesDependent( + labelSelector = "app.kubernetes.io/component=metastore," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator" +) +public class MetastoreServiceDependent + extends HiveDependentResource { + + public MetastoreServiceDependent() { + super(Service.class); + } + + @Override + protected Service desired(HiveCluster hiveCluster, + Context context) { + return new ServiceBuilder() + .withNewMetadata() + .withName(hiveCluster.getMetadata().getName() + "-metastore") + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, + MetastoreDeploymentDependent.COMPONENT)) + .endMetadata() + .withNewSpec() + .withType("ClusterIP") + .withSelector(Labels.selectorForComponent(hiveCluster, + MetastoreDeploymentDependent.COMPONENT)) + .addNewPort() + .withName("thrift") + .withPort(9083) + .withTargetPort(new IntOrString(9083)) + .endPort() + .addNewPort() + .withName("rest") + .withPort(9001) + .withTargetPort(new IntOrString(9001)) + .endPort() + .endSpec() + .build(); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/SchemaInitJobDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/SchemaInitJobDependent.java new file mode 100644 index 000000000000..edc0d386f75a --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/SchemaInitJobDependent.java @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.ArrayList; +import java.util.List; + +import io.fabric8.kubernetes.api.model.Container; +import io.fabric8.kubernetes.api.model.EnvVar; +import io.fabric8.kubernetes.api.model.Volume; +import io.fabric8.kubernetes.api.model.VolumeMount; +import io.fabric8.kubernetes.api.model.batch.v1.Job; +import io.fabric8.kubernetes.api.model.batch.v1.JobBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; +import org.apache.hive.kubernetes.operator.model.spec.DatabaseConfig; +import org.apache.hive.kubernetes.operator.model.spec.SecretKeyRef; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * Manages the Kubernetes Job that initializes or upgrades the Hive Metastore + * database schema using schematool. + */ +@KubernetesDependent( + labelSelector = "app.kubernetes.io/component=schema-init," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator" +) +public class SchemaInitJobDependent + extends HiveDependentResource { + + public static final String COMPONENT = "schema-init"; + + public SchemaInitJobDependent() { + super(Job.class); + } + + @Override + protected Job desired(HiveCluster hiveCluster, + Context context) { + HiveClusterSpec spec = hiveCluster.getSpec(); + DatabaseConfig db = spec.metastore().database(); + + List envVars = new ArrayList<>(); + envVars.add(new EnvVar("SERVICE_NAME", "metastore", null)); + envVars.add(new EnvVar("IS_RESUME", "false", null)); + envVars.add(new EnvVar("HIVE_CUSTOM_CONF_DIR", + CONF_MOUNT_PATH, null)); + envVars.addAll(buildDbEnvVars(db)); + + SecretKeyRef passwordRef = db.passwordSecretRef(); + boolean hasDriverJar = db.driverJarUrl() != null; + + // This Job runs schematool directly (not via the entrypoint), + // so we must replicate the entrypoint's config setup: + // 1. Symlink custom config files into HIVE_CONF_DIR + // 2. Set HADOOP_CLIENT_OPTS to pass SERVICE_OPTS as JVM args + // 3. Copy JDBC driver jar if downloaded by init container + StringBuilder cmd = new StringBuilder(); + cmd.append("export HIVE_CONF_DIR=$HIVE_HOME/conf && "); + cmd.append("if [ -d \"${HIVE_CUSTOM_CONF_DIR:-}\" ]; then "); + cmd.append("find \"${HIVE_CUSTOM_CONF_DIR}\" -type f -exec "); + cmd.append("ln -sfn {} \"${HIVE_CONF_DIR}\"/ \\; ; "); + cmd.append("export HADOOP_CONF_DIR=$HIVE_CONF_DIR; fi && "); + cmd.append("export HADOOP_CLIENT_OPTS=" + + "\"${HADOOP_CLIENT_OPTS:-} -Xmx1G ${SERVICE_OPTS:-}\" && "); + if (hasDriverJar) { + cmd.append("cp ").append(EXT_JARS_PATH) + .append("/*.jar $HIVE_HOME/lib/ && "); + } + cmd.append("$HIVE_HOME/bin/schematool -dbType ") + .append(db.type()) + .append(" -initOrUpgradeSchema"); + if (passwordRef != null) { + cmd.append(" -passWord \"$DBPASSWORD\""); + } + String schemaCommand = cmd.toString(); + + List initContainers = new ArrayList<>(); + List volumeMounts = new ArrayList<>(); + List volumes = new ArrayList<>(); + buildMetastoreVolumes(hiveCluster, volumeMounts, volumes); + + // Schema init needs the JDBC driver JAR + List jars = new ArrayList<>(); + if (db.driverJarUrl() != null) { + jars.add(db.driverJarUrl()); + } + addExternalJars(spec.image(), jars, + initContainers, volumeMounts, volumes, envVars); + + return new JobBuilder() + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) + .endMetadata() + .withNewSpec() + .withBackoffLimit(3) + .withNewTemplate() + .withNewMetadata() + .withLabels(Labels.forComponent( + hiveCluster, COMPONENT)) + .endMetadata() + .withNewSpec() + .withRestartPolicy("OnFailure") + .withInitContainers(initContainers) + .addNewContainer() + .withName("schema-init") + .withImage(spec.image()) + .withImagePullPolicy(spec.imagePullPolicy()) + .withCommand("/bin/bash", "-c") + .withArgs(schemaCommand) + .withEnv(envVars) + .withVolumeMounts(volumeMounts) + .endContainer() + .withVolumes(volumes) + .endSpec() + .endTemplate() + .endSpec() + .build(); + } + + /** Returns the Job resource name for this HiveCluster. */ + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-schema-init"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/ScratchPvcDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/ScratchPvcDependent.java new file mode 100644 index 000000000000..9b1a64af73db --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/ScratchPvcDependent.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.List; + +import io.fabric8.kubernetes.api.model.PersistentVolumeClaim; +import io.fabric8.kubernetes.api.model.PersistentVolumeClaimBuilder; +import io.fabric8.kubernetes.api.model.Quantity; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.spec.TezAmSpec; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * Manages the shared scratch PersistentVolumeClaim mounted by both + * HiveServer2 and TezAM at /opt/hive/scratch. + *

+ * This mirrors the Docker Compose pattern where a named volume + * {@code scratch:/opt/hive/scratch} is shared between the hs2 and + * tezam containers so that the {@code dummy_path} written by HS2 + * (for VALUES clause) is accessible by the TezAM. + *

+ * The PVC uses ReadWriteMany access mode so both pods can mount it + * simultaneously. + */ +@KubernetesDependent( + labelSelector = "app.kubernetes.io/component=scratch," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator" +) +public class ScratchPvcDependent + extends HiveDependentResource { + + public static final String COMPONENT = "scratch"; + + public ScratchPvcDependent() { + super(PersistentVolumeClaim.class); + } + + @Override + protected PersistentVolumeClaim desired(HiveCluster hiveCluster, + Context context) { + TezAmSpec tezAm = hiveCluster.getSpec().tezAm(); + + PersistentVolumeClaimBuilder builder = new PersistentVolumeClaimBuilder() + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) + .endMetadata() + .withNewSpec() + .withAccessModes(List.of("ReadWriteMany")) + .withNewResources() + .addToRequests("storage", + new Quantity(tezAm.scratchStorageSize())) + .endResources() + .endSpec(); + + if (tezAm.scratchStorageClassName() != null) { + builder.editSpec() + .withStorageClassName(tezAm.scratchStorageClassName()) + .endSpec(); + } + + return builder.build(); + } + + /** Returns the PVC resource name for this HiveCluster. */ + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-scratch"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmServiceDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmServiceDependent.java new file mode 100644 index 000000000000..adbe40ce4717 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmServiceDependent.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import io.fabric8.kubernetes.api.model.Service; +import io.fabric8.kubernetes.api.model.ServiceBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * Manages the headless Kubernetes Service for Tez Application Master. + * Required by the StatefulSet for stable DNS entries so that + * HiveServer2 can resolve TezAM pod hostnames for RPC communication. + */ +@KubernetesDependent( + labelSelector = "app.kubernetes.io/component=tezam," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator" +) +public class TezAmServiceDependent + extends HiveDependentResource { + + public TezAmServiceDependent() { + super(Service.class); + } + + @Override + protected Service desired(HiveCluster hiveCluster, + Context context) { + return new ServiceBuilder() + .withNewMetadata() + .withName(hiveCluster.getMetadata().getName() + "-tezam") + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, + TezAmStatefulSetDependent.COMPONENT)) + .endMetadata() + .withNewSpec() + .withClusterIP("None") + .withSelector(Labels.selectorForComponent(hiveCluster, + TezAmStatefulSetDependent.COMPONENT)) + .endSpec() + .build(); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmStatefulSetDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmStatefulSetDependent.java new file mode 100644 index 000000000000..a037baae6050 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmStatefulSetDependent.java @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import io.fabric8.kubernetes.api.model.Container; +import io.fabric8.kubernetes.api.model.EnvVar; +import io.fabric8.kubernetes.api.model.apps.StatefulSet; +import io.fabric8.kubernetes.api.model.apps.StatefulSetBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; +import org.apache.hive.kubernetes.operator.model.spec.TezAmSpec; +import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; +import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * Manages the Kubernetes StatefulSet for the Tez Application Master. + * Uses StatefulSet (with a headless Service) so that each TezAM pod + * gets a stable, DNS-resolvable hostname. HiveServer2 discovers + * TezAM pods via ZooKeeper and connects over RPC using the hostname, + * so the hostname must be resolvable within the cluster. + */ +@KubernetesDependent( + labelSelector = "app.kubernetes.io/component=tezam," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator" +) +public class TezAmStatefulSetDependent + extends HiveDependentResource { + + public static final String COMPONENT = "tezam"; + private static final String SCRATCH_MOUNT_PATH = "/opt/hive/scratch"; + + public TezAmStatefulSetDependent() { + super(StatefulSet.class); + } + + @Override + protected StatefulSet desired(HiveCluster hiveCluster, + Context context) { + HiveClusterSpec spec = hiveCluster.getSpec(); + TezAmSpec tezAm = spec.tezAm(); + Map selectorLabels = + Labels.selectorForComponent(hiveCluster, COMPONENT); + + List envVars = new ArrayList<>(); + envVars.add(new EnvVar("SERVICE_NAME", "tezam", null)); + envVars.add(new EnvVar("IS_RESUME", "true", null)); + envVars.add(new EnvVar("HIVE_ZOOKEEPER_QUORUM", + spec.zookeeper().quorum(), null)); + envVars.add(new EnvVar("TEZ_FRAMEWORK_MODE", + "STANDALONE_ZOOKEEPER", null)); + + if (spec.llap().isEnabled()) { + envVars.add(new EnvVar("HIVE_LLAP_DAEMON_SERVICE_HOSTS", + spec.llap().serviceHosts(), null)); + } + + // User-provided env vars (storage credentials, etc.) + if (spec.envVars() != null) { + envVars.addAll(spec.envVars()); + } + + String headlessServiceName = + hiveCluster.getMetadata().getName() + "-tezam"; + + List volumeMounts = + new ArrayList<>(); + volumeMounts.add(new io.fabric8.kubernetes.api.model.VolumeMountBuilder() + .withName("hive-config") + .withMountPath(CONF_MOUNT_PATH).build()); + volumeMounts.add(new io.fabric8.kubernetes.api.model.VolumeMountBuilder() + .withName("scratch") + .withMountPath(SCRATCH_MOUNT_PATH).build()); + + List volumes = + new ArrayList<>(); + volumes.add(buildProjectedConfigVolume("hive-config", + HiveServer2ConfigMapDependent.resourceName(hiveCluster), + HadoopConfigMapDependent.resourceName(hiveCluster))); + volumes.add(new io.fabric8.kubernetes.api.model.VolumeBuilder() + .withName("scratch") + .withNewPersistentVolumeClaim() + .withClaimName(ScratchPvcDependent.resourceName(hiveCluster)) + .endPersistentVolumeClaim() + .build()); + + List initContainers = new ArrayList<>(); + addExternalJars(spec.image(), spec.externalJars(), + initContainers, volumeMounts, volumes, envVars); + replaceConfMountWithSubPaths(volumeMounts, "hive-config", + "hive-site.xml", "tez-site.xml", "core-site.xml"); + + // Pre-compute config hash for the pod template annotation. + // TezAM uses the same ConfigMaps as HS2 (hive-site.xml + tez-site.xml + core-site.xml). + String configHash = sha256( + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHiveServer2HiveSite(hiveCluster, spec)), + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getTezSite(spec)), + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHadoopCoreSite(spec))); + + StatefulSet statefulSet = new StatefulSetBuilder() + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) + .endMetadata() + .withNewSpec() + .withReplicas(tezAm.replicas()) + .withServiceName(headlessServiceName) + .withNewSelector() + .withMatchLabels(selectorLabels) + .endSelector() + .withNewTemplate() + .withNewMetadata() + .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) + .addToAnnotations("kubectl.kubernetes.io/default-container", "tezam") + .addToAnnotations("hive.apache.org/config-hash", configHash) + .endMetadata() + .withNewSpec() + .withInitContainers(initContainers) + .addNewContainer() + .withName("tezam") + .withImage(spec.image()) + .withImagePullPolicy(spec.imagePullPolicy()) + .withEnv(envVars) + .withResources(buildResources(tezAm.resources())) + .withVolumeMounts(volumeMounts) + .endContainer() + .withVolumes(volumes) + .endSpec() + .endTemplate() + .endSpec() + .build(); + + if (spec.volumes() != null) { + statefulSet.getSpec().getTemplate().getSpec().getVolumes().addAll(spec.volumes()); + } + if (spec.volumeMounts() != null) { + statefulSet.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() + .addAll(spec.volumeMounts()); + } + if (tezAm.extraVolumes() != null) { + statefulSet.getSpec().getTemplate().getSpec().getVolumes().addAll(tezAm.extraVolumes()); + } + if (tezAm.extraVolumeMounts() != null) { + statefulSet.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() + .addAll(tezAm.extraVolumeMounts()); + } + return statefulSet; + } + + /** Returns the StatefulSet resource name for this HiveCluster. */ + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-tezam"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2Precondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2Precondition.java new file mode 100644 index 000000000000..a36002dbf886 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2Precondition.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent.condition; + +import io.fabric8.kubernetes.api.model.apps.Deployment; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; +import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; +import org.apache.hive.kubernetes.operator.model.HiveCluster; + +/** + * Precondition for HiveServer2 Deployment. + * If Metastore is external, proceed immediately. + * If managed, wait for Metastore pods to be ready. + */ +public class HiveServer2Precondition implements Condition { + + @Override + public boolean isMet( + DependentResource dependentResource, + HiveCluster primary, + Context context) { + + if (!primary.getSpec().metastore().isEnabled()) { + return true; + } + + int desiredReplicas = primary.getSpec().metastore().replicas(); + return context.getSecondaryResources(Deployment.class).stream() + .filter(d -> d.getMetadata().getName().equals(primary.getMetadata().getName() + "-metastore")) + .findFirst() + .map(deployment -> deployment.getStatus() != null + && deployment.getStatus().getReadyReplicas() != null + && deployment.getStatus().getReadyReplicas() >= desiredReplicas) + .orElse(false); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/LlapEnabledCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/LlapEnabledCondition.java new file mode 100644 index 000000000000..a113c50efbff --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/LlapEnabledCondition.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent.condition; + +import io.fabric8.kubernetes.api.model.HasMetadata; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; +import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; +import org.apache.hive.kubernetes.operator.model.HiveCluster; + +/** + * Activation condition for LLAP dependent resources. + * Returns true only when spec.llap.enabled is true. + */ +public class LlapEnabledCondition + implements Condition { + + @Override + public boolean isMet( + DependentResource dependentResource, + HiveCluster primary, + Context context) { + return primary.getSpec().llap().isEnabled(); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreEnabledCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreEnabledCondition.java new file mode 100644 index 000000000000..b1cb4139ac96 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreEnabledCondition.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent.condition; + +import io.fabric8.kubernetes.api.model.HasMetadata; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; +import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; +import org.apache.hive.kubernetes.operator.model.HiveCluster; + +/** + * Activation condition for Metastore dependent resources. + * Returns true only when spec.metastore.enabled is true. + */ +public class MetastoreEnabledCondition implements Condition { + @Override + public boolean isMet( + DependentResource dependentResource, + HiveCluster primary, + Context context) { + return primary.getSpec().metastore().isEnabled(); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreReadyCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreReadyCondition.java new file mode 100644 index 000000000000..7b3169f32043 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreReadyCondition.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent.condition; + +import io.fabric8.kubernetes.api.model.apps.Deployment; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; +import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; +import org.apache.hive.kubernetes.operator.model.HiveCluster; + +/** + * Ready condition that checks whether the Metastore Deployment has the + * desired number of ready replicas. Used to gate HiveServer2 Deployment. + */ +public class MetastoreReadyCondition + implements Condition { + + @Override + public boolean isMet( + DependentResource dependentResource, + HiveCluster primary, + Context context) { + if (!primary.getSpec().metastore().isEnabled()) { + return true; + } + int desiredReplicas = primary.getSpec().metastore().replicas(); + return dependentResource.getSecondaryResource(primary, context) + .map(deployment -> deployment.getStatus() != null + && deployment.getStatus().getReadyReplicas() != null + && deployment.getStatus().getReadyReplicas() >= desiredReplicas) + .orElse(false); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/SchemaJobCompletedCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/SchemaJobCompletedCondition.java new file mode 100644 index 000000000000..1b0b44318596 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/SchemaJobCompletedCondition.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent.condition; + +import io.fabric8.kubernetes.api.model.batch.v1.Job; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; +import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; +import org.apache.hive.kubernetes.operator.model.HiveCluster; + +/** + * Ready condition that checks whether the schema initialization Job + * has completed successfully. Used to gate Metastore Deployment creation. + */ +public class SchemaJobCompletedCondition + implements Condition { + + @Override + public boolean isMet( + DependentResource dependentResource, + HiveCluster primary, + Context context) { + if (!primary.getSpec().metastore().isEnabled()) { + return true; + } + return dependentResource.getSecondaryResource(primary, context) + .map(job -> job.getStatus() != null + && job.getStatus().getSucceeded() != null + && job.getStatus().getSucceeded() >= 1) + .orElse(false); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/TezAmEnabledCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/TezAmEnabledCondition.java new file mode 100644 index 000000000000..85ae7e45dbdb --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/TezAmEnabledCondition.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent.condition; + +import io.fabric8.kubernetes.api.model.HasMetadata; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; +import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; +import org.apache.hive.kubernetes.operator.model.HiveCluster; + +/** + * Activation condition for Tez AM dependent resources. + * Returns true only when spec.tezAm.enabled is true. + */ +public class TezAmEnabledCondition + implements Condition { + + @Override + public boolean isMet( + DependentResource dependentResource, + HiveCluster primary, + Context context) { + return primary.getSpec().tezAm().isEnabled(); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveCluster.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveCluster.java new file mode 100644 index 000000000000..6a708e7c8c91 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveCluster.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model; + +import io.fabric8.kubernetes.api.model.Namespaced; +import io.fabric8.kubernetes.client.CustomResource; +import io.fabric8.kubernetes.model.annotation.Group; +import io.fabric8.kubernetes.model.annotation.Kind; +import io.fabric8.kubernetes.model.annotation.ShortNames; +import io.fabric8.kubernetes.model.annotation.Version; + +/** + * HiveCluster is the root CRD type representing a complete Apache Hive deployment + * on Kubernetes. It manages Metastore, HiveServer2, LLAP daemons, and Tez AM. + */ +@Group("hive.apache.org") +@Version("v1alpha1") +@Kind("HiveCluster") +@ShortNames("hc") +public class HiveCluster + extends CustomResource + implements Namespaced { +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterSpec.java new file mode 100644 index 000000000000..8d9e17049899 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterSpec.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model; + +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonPropertyDescription; +import io.fabric8.crd.generator.annotation.PreserveUnknownFields; +import io.fabric8.crd.generator.annotation.SchemaFrom; +import io.fabric8.kubernetes.api.model.EnvVar; +import io.fabric8.kubernetes.api.model.Volume; +import io.fabric8.kubernetes.api.model.VolumeMount; +import org.apache.hive.kubernetes.operator.model.spec.HadoopSpec; +import org.apache.hive.kubernetes.operator.model.spec.HiveServer2Spec; +import org.apache.hive.kubernetes.operator.model.spec.LlapSpec; +import org.apache.hive.kubernetes.operator.model.spec.MetastoreSpec; +import org.apache.hive.kubernetes.operator.model.spec.TezAmSpec; +import org.apache.hive.kubernetes.operator.model.spec.ZookeeperSpec; + +/** Full specification for a HiveCluster custom resource. */ +public record HiveClusterSpec( + @JsonPropertyDescription("Docker image to use for all Hive components") + String image, + @JsonPropertyDescription("Image pull policy: Always, Never, or IfNotPresent") + String imagePullPolicy, + @JsonPropertyDescription("Metastore component configuration") + MetastoreSpec metastore, + @JsonPropertyDescription("HiveServer2 component configuration") + HiveServer2Spec hiveServer2, + @JsonPropertyDescription("LLAP daemon configuration. Disabled by default.") + LlapSpec llap, + @JsonPropertyDescription("Tez Application Master configuration. Disabled by default.") + TezAmSpec tezAm, + @JsonPropertyDescription( + "External ZooKeeper connection details (not managed by this operator)") + ZookeeperSpec zookeeper, + @JsonPropertyDescription("Hadoop/core-site.xml configuration overrides") + HadoopSpec hadoop, + @JsonPropertyDescription( + "Environment variables injected into all component pods " + + "(e.g., storage credentials, custom JVM options)") + @SchemaFrom(type = Object[].class) @PreserveUnknownFields + List envVars, + @JsonPropertyDescription( + "External JARs (URLs) downloaded into all component pods and added to " + + "HADOOP_CLASSPATH (e.g., GCS connector, ABFS connector)") + List externalJars, + @JsonPropertyDescription( + "Volumes added to all component pods " + + "(e.g., Secrets containing keytabs or service account keys)") + @SchemaFrom(type = Object[].class) @PreserveUnknownFields + List volumes, + @JsonPropertyDescription( + "Volume mounts added to all component containers " + + "(e.g., mounting a GCS key file at /etc/gcs/key.json)") + @SchemaFrom(type = Object[].class) @PreserveUnknownFields + List volumeMounts) { + + public HiveClusterSpec { + image = image != null ? image : "apache/hive:4.3.0-SNAPSHOT"; + imagePullPolicy = imagePullPolicy != null ? imagePullPolicy : "IfNotPresent"; + metastore = metastore != null ? + metastore : + new MetastoreSpec(null, null, null, null, null, null, null, null, null, null, null); + hiveServer2 = hiveServer2 != null ? + hiveServer2 : + new HiveServer2Spec(null, null, null, null, null, null, null, null, null, null, null); + llap = llap != null ? llap : new LlapSpec(null, null, null, null, null, null, null, null, null, null); + tezAm = tezAm != null ? tezAm : new TezAmSpec(null, null, null, null, null, null, null, null); + zookeeper = zookeeper != null ? zookeeper : new ZookeeperSpec(null); + hadoop = hadoop != null ? hadoop : new HadoopSpec(null); + envVars = envVars != null ? envVars : List.of(); + externalJars = externalJars != null ? externalJars : List.of(); + volumes = volumes != null ? volumes : List.of(); + volumeMounts = volumeMounts != null ? volumeMounts : List.of(); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterStatus.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterStatus.java new file mode 100644 index 000000000000..d2432dda2246 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterStatus.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model; + +import java.util.ArrayList; +import java.util.List; + +import io.fabric8.kubernetes.api.model.Condition; +import org.apache.hive.kubernetes.operator.model.status.ComponentStatus; + +/** Status subresource for the HiveCluster custom resource. */ +public class HiveClusterStatus { + + private List conditions = new ArrayList<>(); + private ComponentStatus metastore; + private ComponentStatus hiveServer2; + private ComponentStatus llap; + private ComponentStatus tezAm; + private Long observedGeneration; + + public List getConditions() { + return conditions; + } + + public void setConditions(List conditions) { + this.conditions = conditions; + } + + public ComponentStatus getMetastore() { + return metastore; + } + + public void setMetastore(ComponentStatus metastore) { + this.metastore = metastore; + } + + public ComponentStatus getHiveServer2() { + return hiveServer2; + } + + public void setHiveServer2(ComponentStatus hiveServer2) { + this.hiveServer2 = hiveServer2; + } + + public ComponentStatus getLlap() { + return llap; + } + + public void setLlap(ComponentStatus llap) { + this.llap = llap; + } + + public ComponentStatus getTezAm() { + return tezAm; + } + + public void setTezAm(ComponentStatus tezAm) { + this.tezAm = tezAm; + } + + public Long getObservedGeneration() { + return observedGeneration; + } + + public void setObservedGeneration(Long observedGeneration) { + this.observedGeneration = observedGeneration; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + HiveClusterStatus that = (HiveClusterStatus) o; + return java.util.Objects.equals(observedGeneration, that.observedGeneration) && + java.util.Objects.equals(conditions, that.conditions) && + java.util.Objects.equals(metastore, that.metastore) && + java.util.Objects.equals(hiveServer2, that.hiveServer2) && + java.util.Objects.equals(llap, that.llap) && + java.util.Objects.equals(tezAm, that.tezAm); + } + + @Override + public int hashCode() { + return java.util.Objects.hash(conditions, metastore, hiveServer2, llap, tezAm, observedGeneration); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/DatabaseConfig.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/DatabaseConfig.java new file mode 100644 index 000000000000..15181b0526c5 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/DatabaseConfig.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model.spec; + +import com.fasterxml.jackson.annotation.JsonPropertyDescription; + +/** JDBC database connection configuration for the Hive Metastore backend. */ +public record DatabaseConfig( + @JsonPropertyDescription("Database type: derby, mysql, postgres, mssql, or oracle") + String type, + @JsonPropertyDescription("JDBC connection URL") + String url, + @JsonPropertyDescription("JDBC driver class name") + String driver, + @JsonPropertyDescription("Database username") + String username, + @JsonPropertyDescription("Reference to a Kubernetes Secret containing the database password") + SecretKeyRef passwordSecretRef, + @JsonPropertyDescription( + "URL to download the JDBC driver jar, e.g. " + + "https://repo1.maven.org/maven2/org/postgresql/" + + "postgresql/42.7.5/postgresql-42.7.5.jar") + String driverJarUrl) { + + public DatabaseConfig { + if (type == null) { + type = "derby"; + } + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HadoopSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HadoopSpec.java new file mode 100644 index 000000000000..420dc66d4c54 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HadoopSpec.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model.spec; + +import java.util.Map; + +import com.fasterxml.jackson.annotation.JsonPropertyDescription; + +/** Hadoop configuration overrides, primarily for core-site.xml (filesystem settings). */ +public record HadoopSpec( + @JsonPropertyDescription("Key-value pairs written into core-site.xml") + Map coreSiteOverrides) { +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HiveServer2Spec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HiveServer2Spec.java new file mode 100644 index 000000000000..6b888d42f6c9 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HiveServer2Spec.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model.spec; + +import java.util.List; +import java.util.Map; + +import com.fasterxml.jackson.annotation.JsonPropertyDescription; +import io.fabric8.crd.generator.annotation.PreserveUnknownFields; +import io.fabric8.crd.generator.annotation.SchemaFrom; +import io.fabric8.kubernetes.api.model.Volume; +import io.fabric8.kubernetes.api.model.VolumeMount; + +/** Configuration for the HiveServer2 component. */ +public record HiveServer2Spec( + @JsonPropertyDescription("Number of replicas") + Integer replicas, + @JsonPropertyDescription("Resource requirements for pods") + ResourceRequirementsSpec resources, + @JsonPropertyDescription("Additional configuration overrides as key-value pairs") + Map configOverrides, + @JsonPropertyDescription("Additional volumes to attach to the pod (e.g., for keytabs or truststores)") + @SchemaFrom(type = Object[].class) @PreserveUnknownFields + List extraVolumes, + @JsonPropertyDescription("Additional volume mounts for the container") + @SchemaFrom(type = Object[].class) @PreserveUnknownFields + List extraVolumeMounts, + @JsonPropertyDescription("Kubernetes Service type: ClusterIP, LoadBalancer, or NodePort") + String serviceType, + @JsonPropertyDescription("HiveServer2 Thrift port") + Integer thriftPort, + @JsonPropertyDescription("HiveServer2 Web UI port") + Integer webUiPort, + @JsonPropertyDescription("List of URIs to external JARs to download and add to HS2 classpath ") + List externalJars, + @JsonPropertyDescription("Readiness probe configuration") + ProbeSpec readinessProbe, + @JsonPropertyDescription("Liveness probe configuration") + ProbeSpec livenessProbe) { + + public HiveServer2Spec { + replicas = replicas != null ? replicas : 1; + serviceType = serviceType != null ? serviceType : "ClusterIP"; + thriftPort = thriftPort != null ? thriftPort : 10000; + webUiPort = webUiPort != null ? webUiPort : 10002; + extraVolumes = extraVolumes != null ? extraVolumes : List.of(); + extraVolumeMounts = extraVolumeMounts != null ? extraVolumeMounts : List.of(); + externalJars = externalJars != null ? externalJars : List.of(); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/LlapSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/LlapSpec.java new file mode 100644 index 000000000000..c9648c2f411f --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/LlapSpec.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model.spec; + +import java.util.List; +import java.util.Map; + +import com.fasterxml.jackson.annotation.JsonPropertyDescription; +import io.fabric8.crd.generator.annotation.PreserveUnknownFields; +import io.fabric8.crd.generator.annotation.SchemaFrom; +import io.fabric8.kubernetes.api.model.Volume; +import io.fabric8.kubernetes.api.model.VolumeMount; + +/** Configuration for LLAP (Live Long and Process) daemons. */ +public record LlapSpec( + @JsonPropertyDescription("Number of replicas") + Integer replicas, + @JsonPropertyDescription("Resource requirements for pods") + ResourceRequirementsSpec resources, + @JsonPropertyDescription("Additional configuration overrides as key-value pairs") + Map configOverrides, + @JsonPropertyDescription("Additional volumes to attach to the pod (e.g., for keytabs or truststores)") + @SchemaFrom(type = Object[].class) @PreserveUnknownFields + List extraVolumes, + @JsonPropertyDescription("Additional volume mounts for the container") + @SchemaFrom(type = Object[].class) @PreserveUnknownFields + List extraVolumeMounts, + @JsonPropertyDescription("Whether LLAP is enabled") + Boolean enabled, + @JsonPropertyDescription("Number of LLAP executors per daemon") + Integer executors, + @JsonPropertyDescription("Memory in MB per LLAP daemon instance") + Integer memoryMb, + @JsonPropertyDescription("LLAP service hosts identifier for ZooKeeper registration") + String serviceHosts, + @JsonPropertyDescription("Readiness probe configuration") + ProbeSpec readinessProbe) { + + public LlapSpec { + replicas = replicas != null ? replicas : 1; + enabled = enabled != null ? enabled : false; + executors = executors != null ? executors : 1; + memoryMb = memoryMb != null ? memoryMb : 2048; + serviceHosts = serviceHosts != null ? serviceHosts : "@llap0"; + extraVolumes = extraVolumes != null ? extraVolumes : List.of(); + extraVolumeMounts = extraVolumeMounts != null ? extraVolumeMounts : List.of(); + } + + public boolean isEnabled() { + return enabled; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/MetastoreSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/MetastoreSpec.java new file mode 100644 index 000000000000..684fbc9fe2e5 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/MetastoreSpec.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model.spec; + +import java.util.List; +import java.util.Map; + +import com.fasterxml.jackson.annotation.JsonPropertyDescription; +import io.fabric8.crd.generator.annotation.PreserveUnknownFields; +import io.fabric8.crd.generator.annotation.SchemaFrom; +import io.fabric8.kubernetes.api.model.Volume; +import io.fabric8.kubernetes.api.model.VolumeMount; + +/** Configuration for the Hive Metastore component. */ +public record MetastoreSpec( + @JsonPropertyDescription("Number of replicas") + Integer replicas, + @JsonPropertyDescription("Resource requirements for pods") + ResourceRequirementsSpec resources, + @JsonPropertyDescription("Additional configuration overrides as key-value pairs") + Map configOverrides, + @JsonPropertyDescription("Additional volumes to attach to the pod (e.g., for keytabs or truststores)") + @SchemaFrom(type = Object[].class) @PreserveUnknownFields + List extraVolumes, + @JsonPropertyDescription("Additional volume mounts for the container") + @SchemaFrom(type = Object[].class) @PreserveUnknownFields + List extraVolumeMounts, + @JsonPropertyDescription("Database connection configuration for the metastore backend") + DatabaseConfig database, + @JsonPropertyDescription("Warehouse directory path") + String warehouseDir, + @JsonPropertyDescription("Whether the operator should deploy and manage a Metastore") + Boolean enabled, + @JsonPropertyDescription("Thrift URI of the external Metastore (if enabled is false)") + String externalUri, + @JsonPropertyDescription("Readiness probe configuration") + ProbeSpec readinessProbe, + @JsonPropertyDescription("Liveness probe configuration") + ProbeSpec livenessProbe) { + + public MetastoreSpec { + replicas = replicas != null ? replicas : 1; + database = database != null ? database : new DatabaseConfig(null, null, null, null, null, null); + warehouseDir = warehouseDir != null ? warehouseDir : "/opt/hive/data/warehouse"; + enabled = enabled != null ? enabled : true; + extraVolumes = extraVolumes != null ? extraVolumes : List.of(); + extraVolumeMounts = extraVolumeMounts != null ? extraVolumeMounts : List.of(); + } + + public boolean isEnabled() { + return enabled; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ProbeSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ProbeSpec.java new file mode 100644 index 000000000000..7afab6a0277b --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ProbeSpec.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model.spec; + +import com.fasterxml.jackson.annotation.JsonPropertyDescription; + +/** Kubernetes probe (liveness/readiness) timing configurations. */ +public record ProbeSpec( + @JsonPropertyDescription("Number of seconds after the container has started before probes are initiated.") + Integer initialDelaySeconds, + @JsonPropertyDescription("How often (in seconds) to perform the probe.") + Integer periodSeconds, + @JsonPropertyDescription("Number of seconds after which the probe times out.") + Integer timeoutSeconds, + @JsonPropertyDescription("Minimum consecutive failures for the probe to be considered failed after having succeeded.") + Integer failureThreshold, + @JsonPropertyDescription("Minimum consecutive successes for the probe to be considered successful after having failed.") + Integer successThreshold) { +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ResourceRequirementsSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ResourceRequirementsSpec.java new file mode 100644 index 000000000000..feff4afa1357 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ResourceRequirementsSpec.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model.spec; + +import com.fasterxml.jackson.annotation.JsonPropertyDescription; + +/** Kubernetes resource requirements specification for CPU and memory. */ +public record ResourceRequirementsSpec( + @JsonPropertyDescription("CPU request (e.g. 500m, 1)") + String requestsCpu, + @JsonPropertyDescription("Memory request (e.g. 1Gi, 512Mi)") + String requestsMemory, + @JsonPropertyDescription("CPU limit (e.g. 2, 1000m)") + String limitsCpu, + @JsonPropertyDescription("Memory limit (e.g. 2Gi, 1024Mi)") + String limitsMemory) { + + public ResourceRequirementsSpec { + if (requestsCpu == null) { + requestsCpu = "500m"; + } + if (requestsMemory == null) { + requestsMemory = "1Gi"; + } + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/SecretKeyRef.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/SecretKeyRef.java new file mode 100644 index 000000000000..3084552bd1e1 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/SecretKeyRef.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model.spec; + +import com.fasterxml.jackson.annotation.JsonPropertyDescription; + +/** Reference to a key within a Kubernetes Secret. */ +public record SecretKeyRef( + @JsonPropertyDescription("Name of the Kubernetes Secret") + String name, + @JsonPropertyDescription("Key within the Secret") + String key) { +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/TezAmSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/TezAmSpec.java new file mode 100644 index 000000000000..cbb41481c264 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/TezAmSpec.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model.spec; + +import java.util.List; +import java.util.Map; + +import com.fasterxml.jackson.annotation.JsonPropertyDescription; +import io.fabric8.crd.generator.annotation.PreserveUnknownFields; +import io.fabric8.crd.generator.annotation.SchemaFrom; +import io.fabric8.kubernetes.api.model.Volume; +import io.fabric8.kubernetes.api.model.VolumeMount; + +/** Configuration for the Tez Application Master component. */ +public record TezAmSpec( + @JsonPropertyDescription("Number of replicas") + Integer replicas, + @JsonPropertyDescription("Resource requirements for pods") + ResourceRequirementsSpec resources, + @JsonPropertyDescription("Additional configuration overrides as key-value pairs") + Map configOverrides, + @JsonPropertyDescription("Additional volumes to attach to the pod (e.g., for keytabs or truststores)") + @SchemaFrom(type = Object[].class) @PreserveUnknownFields + List extraVolumes, + @JsonPropertyDescription("Additional volume mounts for the container") + @SchemaFrom(type = Object[].class) @PreserveUnknownFields + List extraVolumeMounts, + @JsonPropertyDescription("Whether Tez AM is enabled") + Boolean enabled, + @JsonPropertyDescription("Storage size for the shared scratch PVC " + + "(ReadWriteMany) mounted on HS2 and TezAM at /opt/hive/scratch") + String scratchStorageSize, + @JsonPropertyDescription("StorageClass for the shared scratch PVC. " + + "Must support ReadWriteMany access. If null, uses cluster default.") + String scratchStorageClassName) { + + public TezAmSpec { + replicas = replicas != null ? replicas : 1; + enabled = enabled != null ? enabled : false; + scratchStorageSize = scratchStorageSize != null ? scratchStorageSize : "1Gi"; + extraVolumes = extraVolumes != null ? extraVolumes : List.of(); + extraVolumeMounts = extraVolumeMounts != null ? extraVolumeMounts : List.of(); + } + + public boolean isEnabled() { + return enabled; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ZookeeperSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ZookeeperSpec.java new file mode 100644 index 000000000000..3161c08b81c7 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ZookeeperSpec.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model.spec; + +import com.fasterxml.jackson.annotation.JsonPropertyDescription; + +/** External ZooKeeper connection configuration. ZooKeeper is not managed by this operator. */ +public record ZookeeperSpec( + @JsonPropertyDescription("ZooKeeper quorum connection string") + String quorum) { + + public ZookeeperSpec { + if (quorum == null) { + quorum = "zookeeper:2181"; + } + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/status/ComponentStatus.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/status/ComponentStatus.java new file mode 100644 index 000000000000..155c46f3a714 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/status/ComponentStatus.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model.status; + +/** + * Status of an individual Hive component (Metastore, HS2, LLAP, TezAM). + */ +public class ComponentStatus { + + private int readyReplicas; + private int desiredReplicas; + private String phase; + + public int getReadyReplicas() { + return readyReplicas; + } + + public void setReadyReplicas(int readyReplicas) { + this.readyReplicas = readyReplicas; + } + + public int getDesiredReplicas() { + return desiredReplicas; + } + + public void setDesiredReplicas(int desiredReplicas) { + this.desiredReplicas = desiredReplicas; + } + + public String getPhase() { + return phase; + } + + public void setPhase(String phase) { + this.phase = phase; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ComponentStatus that = (ComponentStatus) o; + return readyReplicas == that.readyReplicas && desiredReplicas == that.desiredReplicas && java.util.Objects.equals( + phase, that.phase); + } + + @Override + public int hashCode() { + return java.util.Objects.hash(readyReplicas, desiredReplicas, phase); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveClusterReconciler.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveClusterReconciler.java new file mode 100644 index 000000000000..c2319adc1f24 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveClusterReconciler.java @@ -0,0 +1,336 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.reconciler; + +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Objects; +import java.util.function.Function; + +import io.fabric8.kubernetes.api.model.Condition; +import io.fabric8.kubernetes.api.model.HasMetadata; +import io.fabric8.kubernetes.api.model.apps.Deployment; +import io.fabric8.kubernetes.api.model.apps.StatefulSet; +import io.fabric8.kubernetes.api.model.batch.v1.Job; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.reconciler.ControllerConfiguration; +import io.javaoperatorsdk.operator.api.reconciler.ErrorStatusHandler; +import io.javaoperatorsdk.operator.api.reconciler.ErrorStatusUpdateControl; +import io.javaoperatorsdk.operator.api.reconciler.Reconciler; +import io.javaoperatorsdk.operator.api.reconciler.UpdateControl; +import io.javaoperatorsdk.operator.api.reconciler.dependent.Dependent; +import org.apache.hive.kubernetes.operator.dependent.HadoopConfigMapDependent; +import org.apache.hive.kubernetes.operator.dependent.HiveServer2ConfigMapDependent; +import org.apache.hive.kubernetes.operator.dependent.HiveServer2DeploymentDependent; +import org.apache.hive.kubernetes.operator.dependent.HiveServer2ServiceDependent; +import org.apache.hive.kubernetes.operator.dependent.LlapConfigMapDependent; +import org.apache.hive.kubernetes.operator.dependent.LlapServiceDependent; +import org.apache.hive.kubernetes.operator.dependent.LlapStatefulSetDependent; +import org.apache.hive.kubernetes.operator.dependent.MetastoreConfigMapDependent; +import org.apache.hive.kubernetes.operator.dependent.MetastoreDeploymentDependent; +import org.apache.hive.kubernetes.operator.dependent.MetastoreServiceDependent; +import org.apache.hive.kubernetes.operator.dependent.SchemaInitJobDependent; +import org.apache.hive.kubernetes.operator.dependent.ScratchPvcDependent; +import org.apache.hive.kubernetes.operator.dependent.TezAmServiceDependent; +import org.apache.hive.kubernetes.operator.dependent.TezAmStatefulSetDependent; +import org.apache.hive.kubernetes.operator.dependent.condition.HiveServer2Precondition; +import org.apache.hive.kubernetes.operator.dependent.condition.LlapEnabledCondition; +import org.apache.hive.kubernetes.operator.dependent.condition.MetastoreEnabledCondition; +import org.apache.hive.kubernetes.operator.dependent.condition.MetastoreReadyCondition; +import org.apache.hive.kubernetes.operator.dependent.condition.SchemaJobCompletedCondition; +import org.apache.hive.kubernetes.operator.dependent.condition.TezAmEnabledCondition; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.HiveClusterStatus; +import org.apache.hive.kubernetes.operator.model.status.ComponentStatus; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Main reconciler for the HiveCluster custom resource. + * Orchestrates all dependent resources with proper dependency ordering. + */ +@ControllerConfiguration( + dependents = { + // --- ConfigMap dependents --- + @Dependent( + name = "hadoop-configmap", + type = HadoopConfigMapDependent.class + ), + @Dependent( + name = "metastore-configmap", + type = MetastoreConfigMapDependent.class, + activationCondition = MetastoreEnabledCondition.class + ), + @Dependent( + name = "hiveserver2-configmap", + type = HiveServer2ConfigMapDependent.class + ), + // --- Job dependents --- + @Dependent( + name = "schema-init-job", + type = SchemaInitJobDependent.class, + dependsOn = {"metastore-configmap", "hadoop-configmap"}, + readyPostcondition = SchemaJobCompletedCondition.class, + activationCondition = MetastoreEnabledCondition.class + ), + // --- Deployment dependents --- + @Dependent( + name = "metastore-deployment", + type = MetastoreDeploymentDependent.class, + dependsOn = {"schema-init-job"}, + readyPostcondition = MetastoreReadyCondition.class, + activationCondition = MetastoreEnabledCondition.class + ), + // --- Service dependents --- + @Dependent( + name = "metastore-service", + type = MetastoreServiceDependent.class, + dependsOn = {"metastore-configmap"}, + activationCondition = MetastoreEnabledCondition.class + ), + @Dependent( + name = "hiveserver2-deployment", + type = HiveServer2DeploymentDependent.class, + dependsOn = {"hiveserver2-configmap", + "hadoop-configmap"}, + reconcilePrecondition = HiveServer2Precondition.class + ), + @Dependent( + name = "hiveserver2-service", + type = HiveServer2ServiceDependent.class, + dependsOn = {"hiveserver2-configmap"} + ), + // --- LLAP (conditional) --- + @Dependent( + name = "llap-configmap", + type = LlapConfigMapDependent.class, + activationCondition = LlapEnabledCondition.class + ), + @Dependent( + name = "llap-statefulset", + type = LlapStatefulSetDependent.class, + dependsOn = {"llap-configmap", "hadoop-configmap"}, + activationCondition = LlapEnabledCondition.class + ), + @Dependent( + name = "llap-service", + type = LlapServiceDependent.class, + activationCondition = LlapEnabledCondition.class + ), + // --- TezAM (conditional) --- + @Dependent( + name = "scratch-pvc", + type = ScratchPvcDependent.class, + activationCondition = TezAmEnabledCondition.class + ), + @Dependent( + name = "tezam-service", + type = TezAmServiceDependent.class, + activationCondition = TezAmEnabledCondition.class + ), + @Dependent( + name = "tezam-statefulset", + type = TezAmStatefulSetDependent.class, + dependsOn = {"hiveserver2-configmap", "hadoop-configmap", + "tezam-service", "scratch-pvc"}, + activationCondition = TezAmEnabledCondition.class + ) + } +) +public class HiveClusterReconciler + implements Reconciler, ErrorStatusHandler { + + private static final Logger LOG = + LoggerFactory.getLogger(HiveClusterReconciler.class); + + @Override + public UpdateControl reconcile(HiveCluster resource, + Context context) { + LOG.info("Reconciling HiveCluster: {}/{}", + resource.getMetadata().getNamespace(), + resource.getMetadata().getName()); + + HiveClusterStatus existingStatus = resource.getStatus(); + HiveClusterStatus newStatus = buildStatus(resource, context, existingStatus); + + if (Objects.equals(existingStatus, newStatus)) { + return UpdateControl.noUpdate(); + } + + resource.setStatus(newStatus); + return UpdateControl.patchStatus(resource); + } + + @Override + public ErrorStatusUpdateControl updateErrorStatus( + HiveCluster resource, Context context, Exception e) { + LOG.error("Error reconciling HiveCluster: {}/{}", + resource.getMetadata().getNamespace(), + resource.getMetadata().getName(), e); + + HiveClusterStatus status = resource.getStatus() != null + ? resource.getStatus() : new HiveClusterStatus(); + + List existingConditions = status.getConditions() != null + ? status.getConditions() : Collections.emptyList(); + + status.setConditions(List.of( + buildCondition("Ready", "False", "ReconciliationError", + e.getMessage(), existingConditions) + )); + status.setObservedGeneration(resource.getMetadata().getGeneration()); + resource.setStatus(status); + + return ErrorStatusUpdateControl.patchStatus(resource); + } + + private HiveClusterStatus buildStatus(HiveCluster resource, + Context context, HiveClusterStatus existingStatus) { + + HiveClusterStatus status = new HiveClusterStatus(); + status.setObservedGeneration(resource.getMetadata().getGeneration()); + + List existingConditions = existingStatus != null && existingStatus.getConditions() != null + ? existingStatus.getConditions() : Collections.emptyList(); + List conditions = new ArrayList<>(); + + // Schema Init status + boolean schemaReady; + if (resource.getSpec().metastore().isEnabled()) { + schemaReady = context.getSecondaryResource(Job.class) + .map(j -> j.getStatus() != null && j.getStatus().getSucceeded() != null && j.getStatus().getSucceeded() >= 1) + .orElse(false); + } else { + schemaReady = true; + } + + conditions.add(buildCondition("SchemaInitialized", schemaReady ? "True" : "False", + schemaReady ? "JobCompleted" : "JobPending", + schemaReady ? "Schema initialized successfully" : "Schema initialization pending", + existingConditions)); + + // Metastore status + boolean metastoreReady; + if (resource.getSpec().metastore().isEnabled()) { + ComponentStatus metastoreStatus = + buildComponentStatus(context, Deployment.class, resource.getMetadata().getName() + "-metastore", + resource.getSpec().metastore().replicas(), + d -> d.getStatus() != null && d.getStatus().getReadyReplicas() != null ? + d.getStatus().getReadyReplicas() : + 0); + status.setMetastore(metastoreStatus); + + metastoreReady = metastoreStatus.getReadyReplicas() >= metastoreStatus.getDesiredReplicas() + && metastoreStatus.getDesiredReplicas() > 0; + + conditions.add(buildCondition("MetastoreReady", metastoreReady ? "True" : "False", + metastoreReady ? "DeploymentReady" : "DeploymentNotReady", + metastoreReady ? "Metastore is ready" : "Metastore not yet ready", existingConditions)); + } else { + metastoreReady = true; + conditions.add(buildCondition("MetastoreReady", "True", "ExternalMetastore", "Using external Hive Metastore", + existingConditions)); + } + + // HiveServer2 status + ComponentStatus hs2Status = buildComponentStatus(context, Deployment.class, + resource.getMetadata().getName() + "-hiveserver2", + resource.getSpec().hiveServer2().replicas(), + d -> d.getStatus() != null && d.getStatus().getReadyReplicas() != null ? d.getStatus().getReadyReplicas() : 0); + status.setHiveServer2(hs2Status); + + boolean hs2Ready = + hs2Status.getReadyReplicas() >= hs2Status.getDesiredReplicas() && hs2Status.getDesiredReplicas() > 0; + conditions.add(buildCondition("HiveServer2Ready", hs2Ready ? "True" : "False", + hs2Ready ? "DeploymentReady" : "DeploymentNotReady", + hs2Ready ? "HiveServer2 is ready" : "HiveServer2 not yet ready", + existingConditions)); + + // LLAP status (optional) + if (resource.getSpec().llap().isEnabled()) { + status.setLlap(buildComponentStatus(context, StatefulSet.class, + resource.getMetadata().getName() + "-llap", + resource.getSpec().llap().replicas(), + s -> s.getStatus() != null && s.getStatus().getReadyReplicas() != null ? + s.getStatus().getReadyReplicas() : 0)); + } + + // TezAM status (optional) + if (resource.getSpec().tezAm().isEnabled()) { + status.setTezAm(buildComponentStatus(context, StatefulSet.class, resource.getMetadata().getName() + "-tezam", + resource.getSpec().tezAm().replicas(), + s -> s.getStatus() != null && + s.getStatus().getReadyReplicas() != null ? s.getStatus().getReadyReplicas() : 0)); + } + + // Overall Ready condition + boolean allReady = schemaReady && metastoreReady && hs2Ready; + conditions.add(buildCondition("Ready", allReady ? "True" : "False", + allReady ? "AllComponentsReady" : "ComponentsNotReady", + allReady ? "All Hive components are ready" : "One or more components are not ready", + existingConditions)); + + status.setConditions(conditions); + return status; + } + + /** + * Unified helper to build status for Deployments, StatefulSets, or any HasMetadata type + * that tracks replicas. Filters by Kubernetes resource name from the informer cache. + */ + private ComponentStatus buildComponentStatus( + Context context, Class resourceClass, String expectedResourceName, + int desiredReplicas, Function readyExtractor) { + + ComponentStatus cs = new ComponentStatus(); + cs.setDesiredReplicas(desiredReplicas); + + int ready = context.getSecondaryResources(resourceClass).stream() + .filter(r -> r.getMetadata().getName().equals(expectedResourceName)) + .findFirst() + .map(readyExtractor) + .orElse(0); + + cs.setReadyReplicas(ready); + cs.setPhase(ready >= desiredReplicas && desiredReplicas > 0 ? "Running" : "Pending"); + return cs; + } + + private Condition buildCondition(String type, String conditionStatus, + String reason, String message, List existingConditions) { + + Condition condition = new Condition(); + condition.setType(type); + condition.setStatus(conditionStatus); + condition.setReason(reason); + condition.setMessage(message); + + // Preserve lastTransitionTime when the condition status has not changed + String preservedTime = existingConditions.stream() + .filter(c -> type.equals(c.getType()) && conditionStatus.equals(c.getStatus())) + .map(Condition::getLastTransitionTime) + .findFirst() + .orElse(null); + + condition.setLastTransitionTime(preservedTime != null ? preservedTime : Instant.now().toString()); + return condition; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/HadoopXmlBuilder.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/HadoopXmlBuilder.java new file mode 100644 index 000000000000..a7735beea976 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/HadoopXmlBuilder.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.util; + +import java.util.Map; +import java.util.TreeMap; + +/** + * Builds Hadoop-style XML configuration file content from a property map. + * The output format matches standard Hadoop configuration files as used by + * Hive, HDFS, and Tez. + */ +public final class HadoopXmlBuilder { + + private HadoopXmlBuilder() { + } + + /** + * Renders a property map as a Hadoop-style XML configuration string. + * + * @param properties key-value pairs to include in the configuration + * @return XML string in Hadoop configuration format + */ + public static String buildXml(Map properties) { + StringBuilder sb = new StringBuilder(); + sb.append("\n"); + sb.append("\n"); + sb.append("\n"); + if (properties != null) { + // Sort by key for deterministic XML output regardless of Map implementation + for (Map.Entry entry : new TreeMap<>(properties).entrySet()) { + sb.append(" \n"); + sb.append(" ").append(escapeXml(entry.getKey())) + .append("\n"); + sb.append(" ").append(escapeXml(entry.getValue())) + .append("\n"); + sb.append(" \n"); + } + } + sb.append("\n"); + return sb.toString(); + } + + private static String escapeXml(String value) { + if (value == null) { + return ""; + } + return value + .replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace("\"", """) + .replace("'", "'"); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/HiveConfigBuilder.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/HiveConfigBuilder.java new file mode 100644 index 000000000000..2e506febf132 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/HiveConfigBuilder.java @@ -0,0 +1,179 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.util; + +import java.util.LinkedHashMap; +import java.util.Map; + +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; +import org.apache.hive.kubernetes.operator.model.spec.DatabaseConfig; +import org.apache.hive.kubernetes.operator.model.spec.HadoopSpec; +import org.apache.hive.kubernetes.operator.model.spec.LlapSpec; +import org.apache.hive.kubernetes.operator.model.spec.MetastoreSpec; + +/** + * Single source of truth for all Hive component configuration properties. + * Both ConfigMap dependents and Deployment/StatefulSet dependents call these + * methods, ensuring the config hash always matches the actual ConfigMap content. + */ +public final class HiveConfigBuilder { + + private HiveConfigBuilder() { + } + + /** Builds hive-site.xml properties for HiveServer2 and TezAM. */ + public static Map getHiveServer2HiveSite( + HiveCluster hiveCluster, HiveClusterSpec spec) { + Map props = new LinkedHashMap<>(); + boolean tezAmEnabled = spec.tezAm().isEnabled(); + String zkQuorum = spec.zookeeper().quorum(); + + String metastoreUri = spec.metastore().isEnabled() + ? "thrift://" + hiveCluster.getMetadata().getName() + "-metastore:9083" + : spec.metastore().externalUri(); + if (metastoreUri != null && !metastoreUri.isEmpty()) { + props.put("hive.metastore.uris", metastoreUri); + } + props.put("hive.metastore.warehouse.dir", spec.metastore().warehouseDir()); + props.put("hive.server2.enable.doAs", "false"); + props.put("hive.tez.exec.inplace.progress", "false"); + props.put("hive.tez.exec.print.summary", "true"); + props.put("hive.jar.directory", "/tmp"); + props.put("hive.user.install.directory", "/tmp"); + if (tezAmEnabled) { + props.put("hive.exec.local.scratchdir", "/opt/hive/scratch"); + } + + if (tezAmEnabled) { + props.put("hive.server2.tez.use.external.sessions", "true"); + props.put("hive.server2.tez.external.sessions.namespace", + "/tez-external-sessions/tez_am/server"); + props.put("hive.server2.tez.external.sessions.registry.class", + "org.apache.hadoop.hive.ql.exec.tez." + + "ZookeeperExternalSessionsRegistryClient"); + props.put("hive.zookeeper.quorum", zkQuorum); + props.put("tez.am.framework.mode", "STANDALONE_ZOOKEEPER"); + props.put("tez.am.registry.namespace", "/tez_am/server"); + props.put("tez.am.zookeeper.quorum", zkQuorum); + LlapSpec llap = spec.llap(); + if (llap.isEnabled()) { + props.put("hive.execution.mode", "llap"); + props.put("hive.llap.execution.mode", "all"); + props.put("hive.llap.daemon.service.hosts", llap.serviceHosts()); + } + } else { + props.put("hive.server2.tez.use.external.sessions", "false"); + props.put("tez.local.mode", "true"); + props.put("tez.am.framework.mode", "LOCAL"); + props.put("mapreduce.framework.name", "local"); + } + + if (spec.hiveServer2().configOverrides() != null) { + props.putAll(spec.hiveServer2().configOverrides()); + } + return props; + } + + /** Builds tez-site.xml properties for HiveServer2 and TezAM. */ + public static Map getTezSite(HiveClusterSpec spec) { + boolean tezAmEnabled = spec.tezAm().isEnabled(); + String zkQuorum = spec.zookeeper().quorum(); + + Map tezProps = new LinkedHashMap<>(); + tezProps.put("tez.am.mode.session", "true"); + tezProps.put("tez.ignore.lib.uris", "true"); + tezProps.put("tez.am.tez-ui.webservice.enable", "false"); + tezProps.put("tez.am.disable.client-version-check", "true"); + tezProps.put("tez.session.am.dag.submit.timeout.secs", "-1"); + tezProps.put("tez.am.zookeeper.quorum", zkQuorum); + tezProps.put("hive.zookeeper.quorum", zkQuorum); + if (tezAmEnabled) { + tezProps.put("tez.local.mode", "false"); + tezProps.put("tez.am.framework.mode", "STANDALONE_ZOOKEEPER"); + tezProps.put("tez.am.registry.namespace", "/tez_am/server"); + } else { + tezProps.put("tez.local.mode", "true"); + } + + LlapSpec llap = spec.llap(); + if (llap.isEnabled()) { + tezProps.put("hive.llap.daemon.service.hosts", llap.serviceHosts()); + } + + if (spec.tezAm().configOverrides() != null) { + tezProps.putAll(spec.tezAm().configOverrides()); + } + return tezProps; + } + + /** Builds core-site.xml properties from hadoop.coreSiteOverrides. */ + public static Map getHadoopCoreSite(HiveClusterSpec spec) { + Map props = new LinkedHashMap<>(); + HadoopSpec hadoop = spec.hadoop(); + if (hadoop != null && hadoop.coreSiteOverrides() != null) { + props.putAll(hadoop.coreSiteOverrides()); + } + return props; + } + + /** Builds metastore-site.xml properties. */ + public static Map getMetastoreSite(HiveClusterSpec spec) { + MetastoreSpec metastore = spec.metastore(); + Map props = new LinkedHashMap<>(); + + props.put("metastore.warehouse.dir", metastore.warehouseDir()); + + DatabaseConfig db = metastore.database(); + if (db != null) { + if (db.url() != null) { + props.put("javax.jdo.option.ConnectionURL", db.url()); + } + if (db.driver() != null) { + props.put("javax.jdo.option.ConnectionDriverName", db.driver()); + } + if (db.username() != null) { + props.put("javax.jdo.option.ConnectionUserName", db.username()); + } + } + + if (metastore.configOverrides() != null) { + props.putAll(metastore.configOverrides()); + } + return props; + } + + /** Builds llap-daemon-site.xml properties. */ + public static Map getLlapDaemonSite(HiveClusterSpec spec) { + LlapSpec llap = spec.llap(); + Map props = new LinkedHashMap<>(); + + props.put("hive.llap.daemon.memory.per.instance.mb", + String.valueOf(llap.memoryMb())); + props.put("hive.llap.daemon.num.executors", + String.valueOf(llap.executors())); + props.put("hive.llap.daemon.service.hosts", llap.serviceHosts()); + props.put("hive.zookeeper.quorum", spec.zookeeper().quorum()); + + if (llap.configOverrides() != null) { + props.putAll(llap.configOverrides()); + } + return props; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/Labels.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/Labels.java new file mode 100644 index 000000000000..dcf0cc43b3c6 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/Labels.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.util; + +import java.util.LinkedHashMap; +import java.util.Map; + +import org.apache.hive.kubernetes.operator.model.HiveCluster; + +/** Standard Kubernetes label and selector helpers following recommended label conventions. */ +public final class Labels { + + public static final String APP_NAME = "app.kubernetes.io/name"; + public static final String APP_INSTANCE = "app.kubernetes.io/instance"; + public static final String APP_COMPONENT = "app.kubernetes.io/component"; + public static final String MANAGED_BY = "app.kubernetes.io/managed-by"; + public static final String MANAGED_BY_VALUE = "hive-kubernetes-operator"; + + private Labels() { + } + + /** + * Returns the full set of labels for a component's Kubernetes resource. + * + * @param hc the HiveCluster resource + * @param component component name (metastore, hiveserver2, llap, tezam, schema-init) + * @return label map + */ + public static Map forComponent(HiveCluster hc, + String component) { + Map labels = new LinkedHashMap<>(); + labels.put(APP_NAME, "apache-hive"); + labels.put(APP_INSTANCE, hc.getMetadata().getName()); + labels.put(APP_COMPONENT, component); + labels.put(MANAGED_BY, MANAGED_BY_VALUE); + return labels; + } + + /** + * Returns the minimal selector labels for matching pods of a component. + * + * @param hc the HiveCluster resource + * @param component component name + * @return selector map + */ + public static Map selectorForComponent(HiveCluster hc, + String component) { + Map selector = new LinkedHashMap<>(); + selector.put(APP_INSTANCE, hc.getMetadata().getName()); + selector.put(APP_COMPONENT, component); + return selector; + } +} diff --git a/packaging/src/kubernetes/src/resources/log4j2.xml b/packaging/src/kubernetes/src/resources/log4j2.xml new file mode 100644 index 000000000000..f906eb0fdf29 --- /dev/null +++ b/packaging/src/kubernetes/src/resources/log4j2.xml @@ -0,0 +1,29 @@ + + + + + + + + + + + + + + + + + diff --git a/pom.xml b/pom.xml index 05507d09a4fe..9682ad8e5b89 100644 --- a/pom.xml +++ b/pom.xml @@ -99,6 +99,8 @@ 3.1.0 2.16.0 3.6.0 + 4.9.6 + 6.13.4 3.5.3 2.7.10 2.3.0 From b1af90184a1d97b6e99380508fbfad52061cf26f Mon Sep 17 00:00:00 2001 From: Ayush Saxena Date: Wed, 20 May 2026 16:40:59 +0530 Subject: [PATCH 2/3] HELM Charts --- .../docker/storage/ozone/docker-compose.yml | 3 - packaging/src/kubernetes/README.md | 1453 ++++++++++------- .../config/samples/hivecluster-full-ha.yaml | 2 +- .../config/samples/hivecluster-minimal.yaml | 2 +- .../kubernetes/helm/hive-operator/Chart.yaml | 34 + .../crds}/hiveclusters.hive.apache.org-v1.yml | 0 .../helm/hive-operator/templates/_helpers.tpl | 52 + .../hive-operator/templates/clusterrole.yaml | 52 + .../templates/clusterrolebinding.yaml | 31 + .../hive-operator/templates/deployment.yaml | 46 + .../hive-operator/templates/hivecluster.yaml | 181 ++ .../templates/serviceaccount.yaml | 24 + .../kubernetes/helm/hive-operator/values.yaml | 158 ++ packaging/src/kubernetes/pom.xml | 19 +- .../dependent/HiveDependentResource.java | 22 + 15 files changed, 1459 insertions(+), 620 deletions(-) create mode 100644 packaging/src/kubernetes/helm/hive-operator/Chart.yaml rename packaging/src/kubernetes/{src/gen => helm/hive-operator/crds}/hiveclusters.hive.apache.org-v1.yml (100%) create mode 100644 packaging/src/kubernetes/helm/hive-operator/templates/_helpers.tpl create mode 100644 packaging/src/kubernetes/helm/hive-operator/templates/clusterrole.yaml create mode 100644 packaging/src/kubernetes/helm/hive-operator/templates/clusterrolebinding.yaml create mode 100644 packaging/src/kubernetes/helm/hive-operator/templates/deployment.yaml create mode 100644 packaging/src/kubernetes/helm/hive-operator/templates/hivecluster.yaml create mode 100644 packaging/src/kubernetes/helm/hive-operator/templates/serviceaccount.yaml create mode 100644 packaging/src/kubernetes/helm/hive-operator/values.yaml diff --git a/packaging/src/docker/storage/ozone/docker-compose.yml b/packaging/src/docker/storage/ozone/docker-compose.yml index 18a16b6d3138..f5cf554b42c4 100644 --- a/packaging/src/docker/storage/ozone/docker-compose.yml +++ b/packaging/src/docker/storage/ozone/docker-compose.yml @@ -34,9 +34,6 @@ x-common-config: OZONE-SITE.XML_hdds.scm.safemode.min.datanode: "1" OZONE-SITE.XML_hdds.scm.safemode.healthy.pipeline.pct: "0" OZONE-SITE.XML_ozone.s3g.domain.name: "s3.ozone" - OZONE-SITE.XML_hdds.datanode.volume.min.free.space: "128MB" - OZONE-SITE.XML_hdds.scm.safemode.pipeline.creation: "false" - OZONE-SITE.XML_ozone.scm.container.size: "256MB" services: datanode: diff --git a/packaging/src/kubernetes/README.md b/packaging/src/kubernetes/README.md index 6d5113a4a346..210cfa238e5e 100644 --- a/packaging/src/kubernetes/README.md +++ b/packaging/src/kubernetes/README.md @@ -26,835 +26,1064 @@ using a single `HiveCluster` custom resource. Built with - **Single CRD** (`HiveCluster`) manages all Hive components - **Four Hive services**: Metastore, HiveServer2, LLAP, and Tez AM +- **Helm chart** with sensible defaults — provide DB + ZK + storage, get a full-HA cluster - **Storage-agnostic**: works with any Hadoop-compatible filesystem (S3A, - ABFS, GCS, HDFS, Ozone) via `hadoop.coreSiteOverrides` and `envVars` -- **Automatic dependency ordering**: schema init before Metastore, Metastore - before HiveServer2, etc. -- **Optional components**: LLAP and Tez AM are enabled/disabled via spec flags -- **External Metastore**: skip deploying the Metastore and point HiveServer2 at - an existing external Hive Metastore + ABFS, GCS, HDFS, Ozone) +- **Automatic dependency ordering**: schema init -> Metastore -> HiveServer2 -> LLAP/TezAM +- **Optional components**: LLAP and Tez AM enabled/disabled via flags +- **External Metastore**: point HiveServer2 at an existing Metastore - **Status reporting**: per-component readiness tracked on the CRD status -## Architecture - -``` -HiveCluster CR - | - v -HiveClusterReconciler - | - +-- HadoopConfigMapDependent (core-site.xml with user-provided filesystem config) - +-- MetastoreConfigMapDependent (metastore-site.xml) - +-- HiveServer2ConfigMapDependent (hive-site.xml + tez-site.xml) - +-- SchemaInitJobDependent (schematool -initOrUpgradeSchema) - +-- MetastoreDeploymentDependent --> MetastoreServiceDependent - +-- HiveServer2DeploymentDependent --> HiveServer2ServiceDependent - +-- LlapStatefulSetDependent --> LlapServiceDependent (optional) - +-- ScratchPvcDependent (shared scratch PVC for HS2+TezAM, optional) - +-- TezAmStatefulSetDependent --> TezAmServiceDependent (optional) -``` - -**Startup order:** - -1. ConfigMaps (Hadoop, Metastore [if enabled], HiveServer2) -2. Schema Init Job (`schematool -initOrUpgradeSchema`) [if Metastore enabled] -3. Metastore Deployment + Service [if enabled] -4. HiveServer2 Deployment + Service -5. LLAP StatefulSet + Scratch PVC + Tez AM StatefulSet (if enabled) - -## Prerequisites - -- Kubernetes cluster (minikube, kind, EKS, GKE, etc.) -- `kubectl` configured to talk to the cluster -- Java 21+ and Maven 3.8+ (for building) -- A ZooKeeper instance accessible from the cluster -- A storage backend accessible from the cluster (S3A, ABFS, GCS, HDFS, or Ozone) +--- -## Build +## Build from Source ```bash +# Build the operator JAR + CRD + Helm chart (no Docker image) mvn clean package -pl packaging/src/kubernetes -DskipTests -``` -This produces: +# Build everything including the Docker image (includes the above) +mvn clean package -pl packaging/src/kubernetes -Pkubernetes -DskipTests +``` | Artifact | Path | |----------|------| | Shaded JAR | `target/hive-kubernetes-operator-*-shaded.jar` | -| CRD YAML (v1) | `src/gen/hiveclusters.hive.apache.org-v1.yml` | +| CRD YAML | `helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml` | +| Helm chart | `helm/hive-operator/` | +| Docker image | `apache/hive:operator-` | -The CRD YAML is auto-generated by fabric8 during compilation and copied to -`src/gen/` so it can be version-controlled. +--- -## Build the Operator Docker Image +## Quick Start (Helm) -Use the `-Pkubernetes` Maven profile to build the Docker image (the image is -tagged with the project version from the POM): +The Helm chart defaults to a **Full-HA** cluster (Metastore x2, HiveServer2 x2, +LLAP x2, TezAM x2). You only need to provide three things: database, ZooKeeper, +and storage. -```bash -mvn clean package -pl packaging/src/kubernetes -Pkubernetes -DskipTests -``` +### Prerequisites -This builds the jar **and** runs: -``` -docker build -t apache/hive:operator- . -``` +- Kubernetes 1.25+ +- Helm 3.x +- A ZooKeeper instance (or install one below) +- A storage backend (Ozone, S3, ABFS, GCS, HDFS) +- A supported RDBMS for the Metastore (or install one below) -Alternatively, build the image manually: +### Step 1: Install Dependencies ```bash -cd packaging/src/kubernetes -export HIVE_VERSION=4.3.0-SNAPSHOT -docker build -t apache/hive:operator-${HIVE_VERSION} . -``` +# ZooKeeper +helm repo add bitnami https://charts.bitnami.com/bitnami +helm install zookeeper bitnami/zookeeper \ + --set replicaCount=1 --set auth.enabled=false \ + --set image.repository=bitnamilegacy/zookeeper \ + --set image.tag=3.9.3-debian-12-r21 \ + --set global.security.allowInsecureImages=true --wait -For **minikube**, build inside the minikube Docker daemon: +# PostgreSQL +helm install postgres bitnami/postgresql \ + --set auth.username=hive --set auth.password=hive123 \ + --set auth.database=metastore --wait -```bash -eval $(minikube docker-env) -export HIVE_VERSION=4.3.0-SNAPSHOT -docker build -t apache/hive:operator-${HIVE_VERSION} . +# Create the DB password secret +kubectl create secret generic hive-db-secret --from-literal=password=hive123 ``` -For **kind**, load the image into the cluster: +If using **Ozone** as the storage backend: ```bash -export HIVE_VERSION=4.3.0-SNAPSHOT -docker build -t apache/hive:operator-${HIVE_VERSION} . -kind load docker-image apache/hive:operator-${HIVE_VERSION} +helm repo add ozone https://apache.github.io/ozone-helm-charts/ +helm install ozone ozone/ozone --version 0.2.0 --wait +sleep 50 +kubectl exec statefulset/ozone-om -- ozone sh volume create /s3v +kubectl exec statefulset/ozone-om -- ozone sh bucket create /s3v/hive ``` -## Install the CRD and Operator +### Step 2: Install the Hive Operator + Cluster -These steps are the same regardless of which deployment scenario you choose. +Choose your storage backend from the examples below. Each shows the CLI command +and an equivalent values file. -### 1. Install the CRD +--- -```bash -cd packaging/src/kubernetes -kubectl apply -f src/gen/hiveclusters.hive.apache.org-v1.yml -``` +## Storage Backend Examples -Verify: +Each example below shows both the `helm install` CLI command and the equivalent +`values.yaml` file. Use whichever approach you prefer. -```bash -kubectl get crd hiveclusters.hive.apache.org -``` +### Ozone (Full-HA, default behavior) -### 2. Deploy RBAC and the Operator +**CLI:** ```bash -kubectl create namespace hive-operator - -kubectl apply -f config/rbac/service-account.yaml -kubectl apply -f config/rbac/cluster-role.yaml -kubectl apply -f config/rbac/cluster-role-binding.yaml - -export HIVE_VERSION=4.3.0-SNAPSHOT -envsubst < config/operator/deployment.yaml | kubectl apply -f - +helm install hive ./helm/hive-operator \ + --set cluster.database.type=postgres \ + --set cluster.database.url="jdbc:postgresql://postgres-postgresql:5432/metastore" \ + --set cluster.database.driver="org.postgresql.Driver" \ + --set cluster.database.username=hive \ + --set cluster.database.passwordSecretRef.name=hive-db-secret \ + --set cluster.database.passwordSecretRef.key=password \ + --set cluster.database.driverJarUrl="https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" \ + --set cluster.zookeeper.quorum="zookeeper:2181" \ + --set cluster.storage.coreSiteOverrides."fs\.defaultFS"="s3a://hive" \ + --set cluster.storage.coreSiteOverrides."fs\.s3a\.endpoint"="http://ozone-s3g-rest:9878" \ + --set-string cluster.storage.coreSiteOverrides."fs\.s3a\.path\.style\.access"=true \ + --set 'cluster.storage.envVars[0].name=HADOOP_OPTIONAL_TOOLS' \ + --set 'cluster.storage.envVars[0].value=hadoop-aws' \ + --set 'cluster.storage.envVars[1].name=AWS_ACCESS_KEY_ID' \ + --set 'cluster.storage.envVars[1].value=ozone' \ + --set 'cluster.storage.envVars[2].name=AWS_SECRET_ACCESS_KEY' \ + --set 'cluster.storage.envVars[2].value=ozone' ``` -Verify the operator is running: +**Values file:** -```bash -kubectl -n hive-operator get pods -``` +```yaml +# values.yaml +cluster: + database: + type: postgres + url: "jdbc:postgresql://postgres-postgresql:5432/metastore" + driver: "org.postgresql.Driver" + username: hive + passwordSecretRef: + name: hive-db-secret + key: password + driverJarUrl: "https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" -### 3. Deploy ZooKeeper (if you don't have one) + zookeeper: + quorum: "zookeeper:2181" -ZooKeeper is required for Tez session management. Skip this if you already -have a ZooKeeper instance. + storage: + coreSiteOverrides: + fs.defaultFS: "s3a://hive" + fs.s3a.endpoint: "http://ozone-s3g-rest:9878" + fs.s3a.path.style.access: "true" + envVars: + - name: HADOOP_OPTIONAL_TOOLS + value: "hadoop-aws" + - name: AWS_ACCESS_KEY_ID + value: "ozone" + - name: AWS_SECRET_ACCESS_KEY + value: "ozone" +``` ```bash -helm repo add bitnami https://charts.bitnami.com/bitnami -helm install zookeeper bitnami/zookeeper \ - --set replicaCount=1 \ - --set auth.enabled=false \ - --set image.repository=bitnamilegacy/zookeeper \ - --set image.tag=3.9.3-debian-12-r21 \ - --set global.security.allowInsecureImages=true \ - --wait +helm install hive ./helm/hive-operator -f values.yaml ``` -This creates a Service named `zookeeper` on port `2181`. - --- -## Storage Setup - -The operator is **storage-agnostic** — it works with any Hadoop-compatible -filesystem. You provide the filesystem configuration via `spec.hadoop.coreSiteOverrides` -(for `core-site.xml` properties) and `spec.envVars` (for credentials injected -into all component pods). The operator does **not** deploy a storage cluster -itself. +### AWS S3 -### Using Apache Ozone (S3A via Helm Chart) - -Apache Ozone provides an S3-compatible object store. Use the official Helm -chart to deploy it alongside the operator. - -#### Step 1: Install Ozone via Helm +**CLI:** +Create the secret with your AWS credentials: ```bash -helm repo add ozone https://apache.github.io/ozone-helm-charts/ -helm install ozone ozone/ozone --version 0.2.0 --wait -``` - -For resource-constrained environments (e.g., CI, minikube), create a -`ozone-values.yaml`: - -```yaml -datanode: - replicas: 1 -env: -- name: OZONE-SITE.XML_hdds.datanode.volume.min.free.space - value: "256MB" -- name: OZONE-SITE.XML_hdds.scm.safemode.enabled - value: "false" -- name: OZONE-SITE.XML_ozone.scm.container.size - value: 128MB -- name: OZONE-SITE.XML_ozone.scm.block.size - value: 32MB -- name: OZONE-SITE.XML_ozone.server.default.replication - value: "1" +kubectl create secret generic aws-s3-creds \ + --from-literal=accessKey="" \ + --from-literal=secretKey="" ``` -Then install with: +Then install the operator and HiveCluster with the appropriate storage config: ```bash -helm install ozone ozone/ozone --version 0.2.0 --values ozone-values.yaml --wait +helm install hive ./helm/hive-operator \ + --set cluster.database.type=postgres \ + --set cluster.database.url="jdbc:postgresql://postgres-postgresql:5432/metastore" \ + --set cluster.database.driver="org.postgresql.Driver" \ + --set cluster.database.username=hive \ + --set cluster.database.passwordSecretRef.name=hive-db-secret \ + --set cluster.database.passwordSecretRef.key=password \ + --set cluster.database.driverJarUrl="https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" \ + --set cluster.zookeeper.quorum="zookeeper:2181" \ + --set cluster.storage.coreSiteOverrides."fs\.defaultFS"="s3a://hive-k8s-bucket" \ + --set 'cluster.storage.envVars[0].name=HADOOP_OPTIONAL_TOOLS' \ + --set 'cluster.storage.envVars[0].value=hadoop-aws' \ + --set 'cluster.storage.envVars[1].name=AWS_ACCESS_KEY_ID' \ + --set 'cluster.storage.envVars[1].valueFrom.secretKeyRef.name=aws-s3-creds' \ + --set 'cluster.storage.envVars[1].valueFrom.secretKeyRef.key=accessKey' \ + --set 'cluster.storage.envVars[2].name=AWS_SECRET_ACCESS_KEY' \ + --set 'cluster.storage.envVars[2].valueFrom.secretKeyRef.name=aws-s3-creds' \ + --set 'cluster.storage.envVars[2].valueFrom.secretKeyRef.key=secretKey' ``` -#### Step 2: Create the Ozone bucket +**Values file:** -```bash -kubectl exec statefulset/ozone-om -- ozone sh volume create /s3v -kubectl exec statefulset/ozone-om -- ozone sh bucket create /s3v/hive -``` +```yaml +# values.yaml +cluster: + database: + type: postgres + url: "jdbc:postgresql://postgres-postgresql:5432/metastore" + driver: "org.postgresql.Driver" + username: hive + passwordSecretRef: + name: hive-db-secret + key: password + driverJarUrl: "https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" -#### Step 3: Configure the HiveCluster CR + zookeeper: + quorum: "zookeeper:2181" -```yaml - hadoop: + storage: coreSiteOverrides: - fs.defaultFS: "s3a://hive" - fs.s3a.endpoint: "http://ozone-s3g-rest:9878" - fs.s3a.path.style.access: "true" - envVars: - - name: HADOOP_OPTIONAL_TOOLS - value: "hadoop-aws" - - name: AWS_ACCESS_KEY_ID - value: "ozone" - - name: AWS_SECRET_ACCESS_KEY - value: "ozone" + fs.defaultFS: "s3a://hive-k8s-bucket" + envVars: + - name: HADOOP_OPTIONAL_TOOLS + value: "hadoop-aws" + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: aws-s3-creds + key: accessKey + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: aws-s3-creds + key: secretKey ``` -The Ozone Helm chart exposes the S3 Gateway as a Kubernetes Service named -`ozone-s3g-rest` on port `9878`. Default Ozone credentials are `ozone`/`ozone`. - -#### Teardown - ```bash -helm uninstall ozone +helm install hive ./helm/hive-operator -f values.yaml ``` -### Using MinIO (S3A) +--- -```yaml - hadoop: - coreSiteOverrides: - fs.defaultFS: "s3a://my-bucket" - fs.s3a.endpoint: "http://minio.minio-ns.svc:9000" - fs.s3a.path.style.access: "true" - envVars: - - name: HADOOP_OPTIONAL_TOOLS - value: "hadoop-aws" - - name: AWS_ACCESS_KEY_ID - valueFrom: - secretKeyRef: - name: minio-creds - key: accessKey - - name: AWS_SECRET_ACCESS_KEY - valueFrom: - secretKeyRef: - name: minio-creds - key: secretKey -``` - -### Using AWS S3 +### Google Cloud Storage (GCS) -```yaml - hadoop: - coreSiteOverrides: - fs.defaultFS: "s3a://my-bucket" - envVars: - - name: HADOOP_OPTIONAL_TOOLS - value: "hadoop-aws" - - name: AWS_ACCESS_KEY_ID - valueFrom: - secretKeyRef: - name: aws-s3-creds - key: accessKey - - name: AWS_SECRET_ACCESS_KEY - valueFrom: - secretKeyRef: - name: aws-s3-creds - key: secretKey -``` - -### Using Azure ABFS +Create the secret with your GCS service account key: -```yaml - hadoop: - coreSiteOverrides: - fs.defaultFS: "abfss://container@account.dfs.core.windows.net" - fs.azure.account.auth.type.account.dfs.core.windows.net: "SharedKey" - fs.azure.account.key.account.dfs.core.windows.net: "$(AZURE_STORAGE_KEY)" - envVars: - - name: HADOOP_OPTIONAL_TOOLS - value: "hadoop-azure" - - name: AZURE_STORAGE_KEY - valueFrom: - secretKeyRef: - name: azure-creds - key: storageKey +```bash +kubectl create secret generic gcs-creds --from-file=key.json=.json ``` -### Using Google Cloud Storage (GCS) - -First create a Secret from your service account key: +**CLI:** ```bash -kubectl create secret generic gcs-creds --from-file=key.json=/path/to/sa-key.json +helm install hive ./helm/hive-operator \ + --set cluster.database.type=postgres \ + --set cluster.database.url="jdbc:postgresql://postgres-postgresql:5432/metastore" \ + --set cluster.database.driver="org.postgresql.Driver" \ + --set cluster.database.username=hive \ + --set cluster.database.passwordSecretRef.name=hive-db-secret \ + --set cluster.database.passwordSecretRef.key=password \ + --set cluster.database.driverJarUrl="https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" \ + --set cluster.zookeeper.quorum="zookeeper:2181" \ + --set 'cluster.storage.coreSiteOverrides.fs\.defaultFS=gs://hive-bucket' \ + --set 'cluster.storage.coreSiteOverrides.fs\.gs\.impl=com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem' \ + --set 'cluster.storage.coreSiteOverrides.fs\.gs\.auth\.type=SERVICE_ACCOUNT_JSON_KEYFILE' \ + --set 'cluster.storage.coreSiteOverrides.fs\.gs\.auth\.service\.account\.json\.keyfile=/etc/gcs/key.json' \ + --set-string 'cluster.storage.coreSiteOverrides.fs\.gs\.reported\.permissions=777' \ + --set 'cluster.storage.externalJars[0]=https://repo1.maven.org/maven2/com/google/cloud/bigdataoss/gcs-connector/hadoop3-2.2.25/gcs-connector-hadoop3-2.2.25-shaded.jar' \ + --set 'cluster.storage.volumes[0].name=gcs-key' \ + --set 'cluster.storage.volumes[0].secret.secretName=gcs-creds' \ + --set 'cluster.storage.volumeMounts[0].name=gcs-key' \ + --set 'cluster.storage.volumeMounts[0].mountPath=/etc/gcs' \ + --set 'cluster.storage.volumeMounts[0].readOnly=true' ``` -Then configure the filesystem, download the GCS connector JAR, and mount the -key file into all pods: +**Values file:** ```yaml - hadoop: +# values.yaml +cluster: + database: + type: postgres + url: "jdbc:postgresql://postgres-postgresql:5432/metastore" + driver: "org.postgresql.Driver" + username: hive + passwordSecretRef: + name: hive-db-secret + key: password + driverJarUrl: "https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" + + zookeeper: + quorum: "zookeeper:2181" + + storage: coreSiteOverrides: - fs.defaultFS: "gs://my-bucket" + fs.defaultFS: "gs://hive-bucket" fs.gs.impl: "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem" fs.gs.auth.type: "SERVICE_ACCOUNT_JSON_KEYFILE" fs.gs.auth.service.account.json.keyfile: "/etc/gcs/key.json" - externalJars: - - "https://repo1.maven.org/maven2/com/google/cloud/bigdataoss/gcs-connector/hadoop3-2.2.25/gcs-connector-hadoop3-2.2.25-shaded.jar" - volumes: - - name: gcs-key - secret: - secretName: gcs-creds - volumeMounts: - - name: gcs-key - mountPath: /etc/gcs - readOnly: true + fs.gs.reported.permissions: "777" + externalJars: + - "https://repo1.maven.org/maven2/com/google/cloud/bigdataoss/gcs-connector/hadoop3-2.2.25/gcs-connector-hadoop3-2.2.25-shaded.jar" + volumes: + - name: gcs-key + secret: + secretName: gcs-creds + volumeMounts: + - name: gcs-key + mountPath: /etc/gcs + readOnly: true ``` -### Using HDFS - -```yaml - hadoop: - coreSiteOverrides: - fs.defaultFS: "hdfs://namenode:8020" +```bash +helm install hive ./helm/hive-operator -f values.yaml ``` --- -## Deployment Scenarios - -### Scenario 1: Minimal Cluster (Metastore + HiveServer2) +## Deployment Modes -**Use this when:** you want a basic Hive cluster backed by external -storage and PostgreSQL. +### Minimal Cluster (no LLAP/TezAM) -#### Step 1: Deploy PostgreSQL (Bitnami Helm) +**CLI:** ```bash -helm repo add bitnami https://charts.bitnami.com/bitnami -helm install postgres bitnami/postgresql \ - --set auth.username=hive \ - --set auth.password=hive123 \ - --set auth.database=metastore \ - --wait +helm install hive ./helm/hive-operator \ + --set cluster.database.type=postgres \ + --set cluster.database.url="jdbc:postgresql://postgres-postgresql:5432/metastore" \ + --set cluster.database.driver="org.postgresql.Driver" \ + --set cluster.database.username=hive \ + --set cluster.database.passwordSecretRef.name=hive-db-secret \ + --set cluster.database.passwordSecretRef.key=password \ + --set cluster.database.driverJarUrl="https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" \ + --set cluster.zookeeper.quorum="zookeeper:2181" \ + --set cluster.storage.coreSiteOverrides."fs\.defaultFS"="s3a://hive" \ + --set cluster.storage.coreSiteOverrides."fs\.s3a\.endpoint"="http://ozone-s3g-rest:9878" \ + --set-string cluster.storage.coreSiteOverrides."fs\.s3a\.path\.style\.access"=true \ + --set 'cluster.storage.envVars[0].name=HADOOP_OPTIONAL_TOOLS' \ + --set 'cluster.storage.envVars[0].value=hadoop-aws' \ + --set 'cluster.storage.envVars[1].name=AWS_ACCESS_KEY_ID' \ + --set 'cluster.storage.envVars[1].value=ozone' \ + --set 'cluster.storage.envVars[2].name=AWS_SECRET_ACCESS_KEY' \ + --set 'cluster.storage.envVars[2].value=ozone' \ + --set cluster.metastore.replicas=1 \ + --set cluster.hiveServer2.replicas=1 \ + --set cluster.llap.enabled=false \ + --set cluster.tezAm.enabled=false ``` -This creates a Service named `postgres-postgresql` on port `5432`. The password -is also stored in a Secret named `postgres-postgresql` under key `password`. - -Create the Secret the operator will reference: +**Values file:** -```bash -kubectl create secret generic hive-db-secret \ - --from-literal=password=hive123 -``` +```yaml +# values.yaml +cluster: + database: + type: postgres + url: "jdbc:postgresql://postgres-postgresql:5432/metastore" + driver: "org.postgresql.Driver" + username: hive + passwordSecretRef: + name: hive-db-secret + key: password + driverJarUrl: "https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" -#### Step 2: Set up storage + zookeeper: + quorum: "zookeeper:2181" -Follow the [Storage Setup](#storage-setup) section above to deploy your -storage backend (e.g., Ozone via Helm). + storage: + coreSiteOverrides: + fs.defaultFS: "s3a://hive" + fs.s3a.endpoint: "http://ozone-s3g-rest:9878" + fs.s3a.path.style.access: "true" + envVars: + - name: HADOOP_OPTIONAL_TOOLS + value: "hadoop-aws" + - name: AWS_ACCESS_KEY_ID + value: "ozone" + - name: AWS_SECRET_ACCESS_KEY + value: "ozone" -#### Step 3: Create the HiveCluster + metastore: + replicas: 1 + hiveServer2: + replicas: 1 + llap: + enabled: false + tezAm: + enabled: false +``` ```bash -envsubst < config/samples/hivecluster-minimal.yaml | kubectl apply -f - +helm install hive ./helm/hive-operator -f values.yaml ``` -Or inline: +--- + +### External Metastore (skip Metastore deployment) + +**CLI:** ```bash -kubectl apply -f - <<'EOF' -apiVersion: hive.apache.org/v1alpha1 -kind: HiveCluster -metadata: - name: my-hive -spec: - image: apache/hive:${HIVE_VERSION} - imagePullPolicy: IfNotPresent +helm install hive ./helm/hive-operator \ + --set cluster.zookeeper.quorum="zookeeper:2181" \ + --set cluster.metastore.enabled=false \ + --set cluster.metastore.externalUri="thrift://my-external-metastore:9083" \ + --set cluster.storage.coreSiteOverrides."fs\.defaultFS"="s3a://hive" \ + --set 'cluster.storage.envVars[0].name=HADOOP_OPTIONAL_TOOLS' \ + --set 'cluster.storage.envVars[0].value=hadoop-aws' \ + --set 'cluster.storage.envVars[1].name=AWS_ACCESS_KEY_ID' \ + --set 'cluster.storage.envVars[1].value=ozone' \ + --set 'cluster.storage.envVars[2].name=AWS_SECRET_ACCESS_KEY' \ + --set 'cluster.storage.envVars[2].value=ozone' +``` - metastore: - replicas: 1 - database: - type: postgres - url: "jdbc:postgresql://postgres-postgresql:5432/metastore" - driver: "org.postgresql.Driver" - username: hive - passwordSecretRef: - name: hive-db-secret - key: password - driverJarUrl: "https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" - warehouseDir: "/hive/warehouse" +**Values file:** - hiveServer2: - replicas: 1 - serviceType: ClusterIP +```yaml +# values.yaml +cluster: + database: {} # Not needed when metastore is external zookeeper: quorum: "zookeeper:2181" - hadoop: + metastore: + enabled: false + externalUri: "thrift://my-external-metastore:9083" + + storage: coreSiteOverrides: fs.defaultFS: "s3a://hive" - fs.s3a.endpoint: "http://ozone-s3g-rest:9878" - fs.s3a.path.style.access: "true" - envVars: - - name: HADOOP_OPTIONAL_TOOLS - value: "hadoop-aws" - - name: AWS_ACCESS_KEY_ID - value: "ozone" - - name: AWS_SECRET_ACCESS_KEY - value: "ozone" -EOF + envVars: + - name: HADOOP_OPTIONAL_TOOLS + value: "hadoop-aws" + - name: AWS_ACCESS_KEY_ID + value: "ozone" + - name: AWS_SECRET_ACCESS_KEY + value: "ozone" ``` -#### What happens - -The operator creates: - -| Resource | Purpose | -|----------|---------| -| `my-hive-hadoop-config` ConfigMap | `core-site.xml` with filesystem configuration from `coreSiteOverrides` | -| `my-hive-metastore-config` ConfigMap | `metastore-site.xml` with warehouse dir and DB settings | -| `my-hive-hiveserver2-config` ConfigMap | `hive-site.xml` + `tez-site.xml` | -| `my-hive-schema-init` | Job that runs `schematool -initOrUpgradeSchema` | -| `my-hive-metastore` | Metastore Deployment + Service (port 9083) | -| `my-hive-hiveserver2` | HiveServer2 Deployment + Service (port 10000) | +```bash +helm install hive ./helm/hive-operator -f values.yaml +``` --- -### Scenario 2: External RDBMS - -**Use this when:** you have an existing database instance (e.g. Amazon RDS, -Cloud SQL, a corporate database, or a different database engine like MySQL/Oracle). +### External Iceberg REST Catalog with Apache Polaris (AWS S3) -**What you provide:** the JDBC URL, credentials, and driver jar URL for your -existing database. +[Apache Polaris](https://polaris.apache.org/) is an Iceberg REST catalog with +built-in OAuth2. Requires **real AWS S3** (Polaris uses STS credential vending). +See `packaging/src/docker/thirdparties/polaris/` for the Docker Compose equivalent. -#### Step 1: Create the database password Secret +**Step 1: Create AWS secret and deploy Polaris** ```bash -kubectl create secret generic hive-db-secret \ - --from-literal=password= +kubectl create secret generic aws-s3-creds \ + --from-literal=accessKey="" \ + --from-literal=secretKey="" + +kubectl run polaris --image=apache/polaris:latest --port=8181 \ + --overrides='{ + "spec": { + "containers": [{ + "name": "polaris", + "image": "apache/polaris:latest", + "ports": [{"containerPort": 8181}], + "env": [ + {"name": "POLARIS_BOOTSTRAP_CREDENTIALS", "value": "POLARIS,iceberg-client,iceberg-client-secret"}, + {"name": "POLARIS_REALM_CONTEXT_REALMS", "value": "POLARIS"}, + {"name": "QUARKUS_OTEL_SDK_DISABLED", "value": "true"}, + {"name": "POLARIS_READINESS_IGNORE_SEVERE_ISSUES", "value": "true"}, + {"name": "AWS_REGION", "value": "ap-south-1"}, + {"name": "AWS_ACCESS_KEY_ID", "valueFrom": {"secretKeyRef": {"name": "aws-s3-creds", "key": "accessKey"}}}, + {"name": "AWS_SECRET_ACCESS_KEY", "valueFrom": {"secretKeyRef": {"name": "aws-s3-creds", "key": "secretKey"}}} + ] + }] + } + }' +kubectl expose pod polaris --port=8181 --name=polaris +kubectl wait --for=condition=Ready pod/polaris --timeout=120s ``` -#### Step 2: Create the HiveCluster - -Point the `database` section at your external RDBMS: +**Step 2: Bootstrap Polaris catalog** -```yaml -apiVersion: hive.apache.org/v1alpha1 -kind: HiveCluster -metadata: - name: my-hive -spec: - image: apache/hive:${HIVE_VERSION} +```bash +kubectl run polaris-init --rm -it --restart=Never --image=alpine/curl -- sh -c ' + apk add --no-cache jq > /dev/null 2>&1 + + # Wait for Polaris + until curl -sf http://polaris:8181/api/catalog/v1/oauth/tokens \ + --user "iceberg-client:iceberg-client-secret" \ + -H "Polaris-Realm: POLARIS" \ + -d grant_type=client_credentials -d scope=PRINCIPAL_ROLE:ALL > /dev/null 2>&1; do + sleep 2 + done + + TOKEN=$(curl -s http://polaris:8181/api/catalog/v1/oauth/tokens \ + --user "iceberg-client:iceberg-client-secret" \ + -H "Polaris-Realm: POLARIS" \ + -d grant_type=client_credentials -d scope=PRINCIPAL_ROLE:ALL | jq -r .access_token) + echo "Token: ${TOKEN:0:20}..." + + # Create catalog with S3 storage + curl -s -H "Authorization: Bearer $TOKEN" -H "Polaris-Realm: POLARIS" \ + -H "Content-Type: application/json" \ + http://polaris:8181/api/management/v1/catalogs \ + -d "{\"catalog\":{\"name\":\"ice01\",\"type\":\"INTERNAL\",\"readOnly\":false, + \"properties\":{\"default-base-location\":\"s3://ayush-k8s-bucket /warehouse\"}, + \"storageConfigInfo\":{\"storageType\":\"S3\", + \"roleArn\":\"arn:aws:iam:::role/\", + \"allowedLocations\":[\"s3://ayush-k8s-bucket /\"]}}}" + echo "" + echo "Polaris bootstrap complete." +' +``` - metastore: - replicas: 1 - database: - type: postgres - url: "jdbc:postgresql://my-rds-host.us-east-1.rds.amazonaws.com:5432/metastore" - driver: "org.postgresql.Driver" - username: hive_admin - passwordSecretRef: - name: hive-db-secret - key: password - driverJarUrl: "https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" - warehouseDir: "/hive/warehouse" +**Step 3: Install Hive** - hiveServer2: - replicas: 1 - serviceType: ClusterIP +```bash +helm install hive ./helm/hive-operator \ + --set cluster.metastore.enabled=false \ + --set cluster.zookeeper.quorum="zookeeper:2181" \ + --set 'cluster.storage.envVars[0].name=HADOOP_OPTIONAL_TOOLS' \ + --set 'cluster.storage.envVars[0].value=hadoop-aws' \ + --set 'cluster.storage.envVars[1].name=AWS_ACCESS_KEY_ID' \ + --set 'cluster.storage.envVars[1].valueFrom.secretKeyRef.name=aws-s3-creds' \ + --set 'cluster.storage.envVars[1].valueFrom.secretKeyRef.key=accessKey' \ + --set 'cluster.storage.envVars[2].name=AWS_SECRET_ACCESS_KEY' \ + --set 'cluster.storage.envVars[2].valueFrom.secretKeyRef.name=aws-s3-creds' \ + --set 'cluster.storage.envVars[2].valueFrom.secretKeyRef.key=secretKey' \ + --set 'cluster.hiveServer2.configOverrides.hive\.metastore\.warehouse\.dir=s3a:///warehouse' \ + --set 'cluster.hiveServer2.configOverrides.metastore\.catalog\.default=ice01' \ + --set 'cluster.hiveServer2.configOverrides.metastore\.client\.impl=org.apache.iceberg.hive.client.HiveRESTCatalogClient' \ + --set 'cluster.hiveServer2.configOverrides.iceberg\.catalog\.ice01\.uri=http://polaris:8181/api/catalog' \ + --set 'cluster.hiveServer2.configOverrides.iceberg\.catalog\.ice01\.type=rest' \ + --set 'cluster.hiveServer2.configOverrides.iceberg\.catalog\.ice01\.warehouse=ice01' \ + --set 'cluster.hiveServer2.configOverrides.iceberg\.catalog\.ice01\.rest\.auth\.type=oauth2' \ + --set 'cluster.hiveServer2.configOverrides.iceberg\.catalog\.ice01\.oauth2-server-uri=http://polaris:8181/api/catalog/v1/oauth/tokens' \ + --set 'cluster.hiveServer2.configOverrides.iceberg\.catalog\.ice01\.credential=iceberg-client:iceberg-client-secret' \ + --set 'cluster.hiveServer2.configOverrides.iceberg\.catalog\.ice01\.scope=PRINCIPAL_ROLE:ALL' \ + --set-string 'cluster.hiveServer2.configOverrides.hive\.scheduled\.queries\.executor\.enabled=false' \ + --set-string 'cluster.hiveServer2.configOverrides.hive\.materializedview\.rebuild\.incremental=false' \ + --set 'cluster.hiveServer2.configOverrides.hive\.metastore\.transactional\.event\.listeners=' \ + --set-string 'cluster.hiveServer2.configOverrides.hive\.notification\.event\.poll\.interval=0' \ + --set-string 'cluster.hiveServer2.configOverrides.hive\.stats\.autogather=false' \ + --set-string 'cluster.hiveServer2.configOverrides.hive\.stats\.fetch\.column\.stats=false' \ + --set-string 'cluster.hiveServer2.configOverrides.hive\.stats\.estimate=false' +``` - zookeeper: - quorum: "zookeeper:2181" +**Cleanup:** - hadoop: - coreSiteOverrides: - fs.defaultFS: "s3a://my-bucket" - envVars: - - name: HADOOP_OPTIONAL_TOOLS - value: "hadoop-aws" - - name: AWS_ACCESS_KEY_ID - valueFrom: - secretKeyRef: - name: s3-creds - key: accessKey - - name: AWS_SECRET_ACCESS_KEY - valueFrom: - secretKeyRef: - name: s3-creds - key: secretKey +```bash +helm uninstall hive +kubectl delete pod polaris +kubectl delete svc polaris +kubectl delete secret aws-s3-creds ``` -The `driverJarUrl` field tells the operator to add an init container that -downloads the JDBC driver JAR at pod startup. This works for any URL -(Maven Central, internal artifact repo, etc.). +--- -#### Supported databases +### External Iceberg REST Catalog with Apache Gravitino (Ozone Storage) -| Database | `type` | Example `url` | Example `driver` | -|----------|--------|---------------|------------------| -| PostgreSQL | `postgres` | `jdbc:postgresql://host:5432/metastore` | `org.postgresql.Driver` | -| MySQL | `mysql` | `jdbc:mysql://host:3306/metastore` | `com.mysql.cj.jdbc.Driver` | -| Oracle | `oracle` | `jdbc:oracle:thin:@host:1521/FREEPDB1` | `oracle.jdbc.OracleDriver` | -| Derby | `derby` | *(embedded, no URL needed)* | *(auto-detected)* | +[Apache Gravitino](https://gravitino.apache.org/) is an Iceberg REST catalog +that uses an external OAuth2 provider (Keycloak) for authentication. This +setup mirrors the working Docker Compose configuration in +`packaging/src/docker/thirdparties/gravitino/` but adapted for Kubernetes with +Ozone S3 storage. +**Step 1: Deploy Keycloak with the Hive realm** -### Scenario 3: External Hive Metastore +```bash +# Create Keycloak realm config (defines iceberg-client with service account) +kubectl create configmap keycloak-realm --from-file=realm-export.json=<(cat <<'EOF' +{ + "realm": "hive", + "enabled": true, + "clients": [ + { + "clientId": "iceberg-client", + "secret": "iceberg-client-secret", + "enabled": true, + "redirectUris": ["*"], + "serviceAccountsEnabled": true, + "protocol": "openid-connect", + "publicClient": false, + "directAccessGrantsEnabled": false, + "standardFlowEnabled": false, + "defaultClientScopes": ["catalog"], + "optionalClientScopes": [], + "protocolMappers": [ + { + "name": "audience", + "protocol": "openid-connect", + "protocolMapper": "oidc-audience-mapper", + "consentRequired": false, + "config": { + "included.client.audience": "hive-iceberg", + "id.token.claim": "false", + "access.token.claim": "true" + } + } + ], + "attributes": { + "access.token.lifespan": "3600" + } + } + ], + "clientScopes": [ + { + "name": "catalog", + "protocol": "openid-connect", + "attributes": {}, + "protocolMappers": [] + } + ] +} +EOF +) + +# Deploy Keycloak with the realm import +kubectl run keycloak --image=quay.io/keycloak/keycloak:25.0.1 --port=8080 \ + --overrides='{ + "spec": { + "containers": [{ + "name": "keycloak", + "image": "quay.io/keycloak/keycloak:25.0.1", + "args": ["start-dev", "--import-realm", "--health-enabled=true"], + "ports": [{"containerPort": 8080}], + "env": [ + {"name": "KEYCLOAK_ADMIN", "value": "admin"}, + {"name": "KEYCLOAK_ADMIN_PASSWORD", "value": "admin"} + ], + "volumeMounts": [{"name": "realm", "mountPath": "/opt/keycloak/data/import"}] + }], + "volumes": [{"name": "realm", "configMap": {"name": "keycloak-realm"}}] + } + }' +kubectl expose pod keycloak --port=8080 --name=keycloak +kubectl wait --for=condition=Ready pod/keycloak --timeout=180s +``` -**Use this when:** you already have an existing Hive Metastore running outside -the cluster (or managed separately) and only want the operator to deploy -HiveServer2 (and optionally LLAP / Tez AM). +**Step 2: Deploy Gravitino** -Set `spec.metastore.enabled: false` and provide the thrift URI of your external -Metastore via `spec.metastore.externalUri`. The operator will skip deploying the -Metastore Deployment, Service, schema-init Job, and metastore ConfigMap entirely. -HiveServer2 will connect directly to the external Metastore. +```bash +# Create Gravitino config (matches Docker thirdparties/gravitino setup, with s3a warehouse) +kubectl create configmap gravitino-conf --from-file=gravitino-iceberg-rest-server.conf=<(cat <<'EOF' +gravitino.iceberg-rest.httpPort = 9001 +gravitino.iceberg-rest.catalog-backend = jdbc +gravitino.iceberg-rest.uri = jdbc:h2:file:/tmp/gravitino_h2_db;AUTO_SERVER=TRUE +gravitino.iceberg-rest.jdbc-driver = org.h2.Driver +gravitino.iceberg-rest.jdbc-user = sa +gravitino.iceberg-rest.jdbc-password = +gravitino.iceberg-rest.jdbc-initialize = true +gravitino.iceberg-rest.warehouse = s3a://hive/warehouse +gravitino.authenticators = oauth +gravitino.authenticator.oauth.serverUri = http://keycloak:8080/realms/hive +gravitino.authenticator.oauth.tokenPath = /protocol/openid-connect/token +gravitino.authenticator.oauth.scope = openid catalog +gravitino.authenticator.oauth.clientId = iceberg-client +gravitino.authenticator.oauth.clientSecret = iceberg-client-secret +gravitino.authenticator.oauth.tokenValidatorClass = org.apache.gravitino.server.authentication.JwksTokenValidator +gravitino.authenticator.oauth.jwksUri = http://keycloak:8080/realms/hive/protocol/openid-connect/certs +gravitino.authenticator.oauth.provider = default +gravitino.authenticator.oauth.principalFields = sub +gravitino.authenticator.oauth.allowSkewSecs = 60 +gravitino.authenticator.oauth.serviceAudience = hive-iceberg +EOF +) + +# Deploy Gravitino Iceberg REST server +kubectl run gravitino --image=apache/gravitino-iceberg-rest:1.0.0 --port=9001 \ + --overrides='{ + "spec": { + "containers": [{ + "name": "gravitino", + "image": "apache/gravitino-iceberg-rest:1.0.0", + "command": ["/bin/bash", "-c"], + "args": ["cp /tmp/gravitino-conf/gravitino-iceberg-rest-server.conf /root/gravitino-iceberg-rest-server/conf/gravitino-iceberg-rest-server.conf && mkdir -p /root/gravitino-iceberg-rest-server/libs && curl -sL -o /root/gravitino-iceberg-rest-server/libs/h2-2.2.220.jar https://repo1.maven.org/maven2/com/h2database/h2/2.2.220/h2-2.2.220.jar && /bin/bash /root/gravitino-iceberg-rest-server/bin/iceberg-rest-server.sh start && tail -f /dev/null"], + "ports": [{"containerPort": 9001}], + "volumeMounts": [{"name": "conf", "mountPath": "/tmp/gravitino-conf"}] + }], + "volumes": [{"name": "conf", "configMap": {"name": "gravitino-conf"}}] + } + }' +kubectl expose pod gravitino --port=9001 --name=gravitino +kubectl wait --for=condition=Ready pod/gravitino --timeout=120s +``` +**Step 3: Install Hive with Gravitino as external catalog** -Like: +**CLI:** -```yaml -apiVersion: hive.apache.org/v1alpha1 -kind: HiveCluster -metadata: - name: my-hive -spec: - image: apache/hive:${HIVE_VERSION} - imagePullPolicy: IfNotPresent +```bash +helm install hive ./helm/hive-operator \ + --set cluster.metastore.enabled=false \ + --set cluster.zookeeper.quorum="zookeeper:2181" \ + --set cluster.storage.coreSiteOverrides."fs\.defaultFS"="s3a://hive" \ + --set cluster.storage.coreSiteOverrides."fs\.s3a\.endpoint"="http://ozone-s3g-rest:9878" \ + --set-string cluster.storage.coreSiteOverrides."fs\.s3a\.path\.style\.access"=true \ + --set 'cluster.storage.envVars[0].name=HADOOP_OPTIONAL_TOOLS' \ + --set 'cluster.storage.envVars[0].value=hadoop-aws' \ + --set 'cluster.storage.envVars[1].name=AWS_ACCESS_KEY_ID' \ + --set 'cluster.storage.envVars[1].value=ozone' \ + --set 'cluster.storage.envVars[2].name=AWS_SECRET_ACCESS_KEY' \ + --set 'cluster.storage.envVars[2].value=ozone' \ + --set 'cluster.hiveServer2.configOverrides.metastore\.catalog\.default=ice01' \ + --set 'cluster.hiveServer2.configOverrides.metastore\.client\.impl=org.apache.iceberg.hive.client.HiveRESTCatalogClient' \ + --set 'cluster.hiveServer2.configOverrides.iceberg\.catalog\.ice01\.uri=http://gravitino:9001/iceberg' \ + --set 'cluster.hiveServer2.configOverrides.iceberg\.catalog\.ice01\.type=rest' \ + --set 'cluster.hiveServer2.configOverrides.iceberg\.catalog\.ice01\.rest\.auth\.type=oauth2' \ + --set 'cluster.hiveServer2.configOverrides.iceberg\.catalog\.ice01\.oauth2-server-uri=http://keycloak:8080/realms/hive/protocol/openid-connect/token' \ + --set 'cluster.hiveServer2.configOverrides.iceberg\.catalog\.ice01\.credential=iceberg-client:iceberg-client-secret' \ + --set-string 'cluster.hiveServer2.configOverrides.hive\.scheduled\.queries\.executor\.enabled=false' \ + --set-string 'cluster.hiveServer2.configOverrides.hive\.materializedview\.rebuild\.incremental=false' \ + --set 'cluster.hiveServer2.configOverrides.hive\.metastore\.transactional\.event\.listeners=' \ + --set-string 'cluster.hiveServer2.configOverrides.hive\.notification\.event\.poll\.interval=0' \ + --set-string 'cluster.hiveServer2.configOverrides.hive\.stats\.autogather=false' \ + --set-string 'cluster.hiveServer2.configOverrides.hive\.stats\.fetch\.column\.stats=false' \ + --set-string 'cluster.hiveServer2.configOverrides.hive\.stats\.estimate=false' +``` + +**Values file:** +```yaml +# values-gravitino.yaml +cluster: metastore: enabled: false - externalUri: "thrift://host.docker.internal:9083" - - hiveServer2: - replicas: 2 - serviceType: ClusterIP - resources: - requestsMemory: "1Gi" - limitsMemory: "2Gi" - configOverrides: - hive.server2.enable.doAs: "false" - - llap: - enabled: true - replicas: 2 - executors: 1 - memoryMb: 1024 - serviceHosts: "@llap0" - resources: - requestsMemory: "2Gi" - limitsMemory: "3Gi" - - tezAm: - enabled: true - replicas: 2 zookeeper: quorum: "zookeeper:2181" - hadoop: + storage: coreSiteOverrides: fs.defaultFS: "s3a://hive" fs.s3a.endpoint: "http://ozone-s3g-rest:9878" fs.s3a.path.style.access: "true" - envVars: - - name: HADOOP_OPTIONAL_TOOLS - value: "hadoop-aws" - - name: AWS_ACCESS_KEY_ID - value: "ozone" - - name: AWS_SECRET_ACCESS_KEY - value: "ozone" + envVars: + - name: HADOOP_OPTIONAL_TOOLS + value: "hadoop-aws" + - name: AWS_ACCESS_KEY_ID + value: "ozone" + - name: AWS_SECRET_ACCESS_KEY + value: "ozone" + + hiveServer2: + configOverrides: + # Iceberg REST catalog connection + metastore.catalog.default: "ice01" + metastore.client.impl: "org.apache.iceberg.hive.client.HiveRESTCatalogClient" + iceberg.catalog.ice01.uri: "http://gravitino:9001/iceberg" + iceberg.catalog.ice01.type: "rest" + iceberg.catalog.ice01.rest.auth.type: "oauth2" + iceberg.catalog.ice01.oauth2-server-uri: "http://keycloak:8080/realms/hive/protocol/openid-connect/token" + iceberg.catalog.ice01.credential: "iceberg-client:iceberg-client-secret" + # Disable HMS-dependent features (not available with REST catalog) + hive.scheduled.queries.executor.enabled: "false" + hive.materializedview.rebuild.incremental: "false" + hive.metastore.transactional.event.listeners: "" + hive.notification.event.poll.interval: "0" + hive.stats.autogather: "false" + hive.stats.fetch.column.stats: "false" + hive.stats.estimate: "false" ``` -#### What happens +```bash +helm install hive ./helm/hive-operator -f values-gravitino.yaml +``` -When `metastore.enabled` is `false`: +**Test the connection:** -| Skipped | Reason | -|---------|--------| -| `my-hive-schema-init` Job | No managed DB to initialize | -| `my-hive-metastore` Deployment + Service | External Metastore handles this | -| `my-hive-metastore-config` ConfigMap | Not needed | +```bash +kubectl exec -it deployment/hive-hiveserver2 -- beeline -u "jdbc:hive2://localhost:10000/" \ + -e "CREATE TABLE test (id INT, name STRING); INSERT INTO test VALUES (1, 'hello'); SELECT * FROM test;" +``` + +**Cleanup:** -The operator still creates HiveServer2 (and optionally LLAP / Tez AM) with -`hive.metastore.uris` pointing at the external URI. The status reports -`MetastoreReady: True` with reason `ExternalMetastore`. +```bash +helm uninstall hive +kubectl delete pod gravitino keycloak +kubectl delete svc gravitino keycloak +kubectl delete configmap keycloak-realm gravitino-conf +``` --- -### Scenario 4: Full Cluster (LLAP + Tez AM) +### Custom Replicas and Resources -**Use this when:** you want all four Hive services running - Metastore, -HiveServer2, LLAP daemons, and a standalone Tez Application Master. +**Values file:** -When `tezAm.enabled` is `true`, HiveServer2 is configured to use external Tez -sessions via ZooKeeper (`hive.server2.tez.use.external.sessions=true`). The -standalone Tez AM registers itself in ZooKeeper and HiveServer2 discovers it -through the registry. +```yaml +# values.yaml +cluster: + # ... database, zookeeper, storage as above ... -When `tezAm.enabled` is `false` (the default), HiveServer2 runs Tez in local -mode (`tez.local.mode=true`), where the Tez DAG executes inside the HiveServer2 -JVM itself. + metastore: + replicas: 3 + resources: + requestsMemory: "1Gi" + limitsMemory: "2Gi" -#### Step 1: Deploy PostgreSQL and Storage + hiveServer2: + replicas: 4 + serviceType: LoadBalancer + resources: + requestsCpu: "1" + requestsMemory: "2Gi" + limitsMemory: "4Gi" -See [Scenario 1 Step 1](#step-1-deploy-postgresql-bitnami-helm) for PostgreSQL and -[Storage Setup](#storage-setup) for your storage backend. + llap: + enabled: true + replicas: 3 + executors: 2 + memoryMb: 4096 + resources: + requestsMemory: "4Gi" + limitsMemory: "6Gi" -#### Step 2: Create the HiveCluster + tezAm: + replicas: 3 + scratchStorageSize: "5Gi" +``` ```bash -envsubst < config/samples/hivecluster-full-ha.yaml | kubectl apply -f - +helm install hive ./helm/hive-operator -f values.yaml ``` -#### What this creates - -With all components enabled, the operator creates approximately 12 resources on Non HA mode: - -| Category | Resources | -|----------|-----------| -| Hive | Schema-init Job (1), Metastore (1), HiveServer2 (2), LLAP (2), Tez AM (1), scratch PVC (1) | -| You deployed | PostgreSQL (1), ZooKeeper (1), storage backend | - --- -## Monitor +## Verify ```bash -# Watch pods come up in order kubectl get pods -w - -# Check HiveCluster status and conditions kubectl get hiveclusters -kubectl describe hivecluster my-hive - -# Operator logs -kubectl -n hive-operator logs -f deployment/hive-operator +kubectl describe hivecluster hive ``` ## Connect to HiveServer2 ```bash -# Port-forward the thrift port -kubectl port-forward svc/my-hive-hiveserver2 10000:10000 - -# Connect with Beeline -beeline -u "jdbc:hive2://my-hive-hiveserver2:10000/" - -# Connect to HiveServe2 UI -kubectl port-forward svc/my-hive-hiveserver2 10002:10002 - -Then use the URL `http://localhost:10002/` to access the UI. +kubectl exec -it deployment/hive-hiveserver2 -- beeline -u "jdbc:hive2://hive-hiveserver2:10000/" ``` -Or exec into the HiveServer2 pod directly: +Or via port-forward: ```bash -kubectl exec -it deployment/my-hive-hiveserver2 -- beeline -u "jdbc:hive2://my-hive-hiveserver2:10000/" +kubectl port-forward svc/hive-hiveserver2 10000:10000 +beeline -u "jdbc:hive2://localhost:10000/" ``` -If the HiveServer2 Service type is `LoadBalancer` or `NodePort`, use the -external address directly instead of port-forwarding. - - - -## CRD Reference +--- -### Top-level spec +## Helm Values Reference + +### Operator + +| Value | Default | Description | +|-------|---------|-------------| +| `operator.image.repository` | `apache/hive` | Operator image repository | +| `operator.image.tag` | `operator-4.3.0-SNAPSHOT` | Operator image tag | +| `operator.image.pullPolicy` | `IfNotPresent` | Image pull policy | +| `operator.resources` | `{requests: {cpu: 200m, memory: 256Mi}, limits: {memory: 512Mi}}` | Operator pod resources | + +### Cluster (HiveCluster CR) + +| Value | Default | Description | +|-------|---------|-------------| +| `cluster.enabled` | `true` | Create a HiveCluster CR (set `false` to install only the operator) | +| `cluster.name` | `hive` | HiveCluster resource name | +| `cluster.image` | `apache/hive:4.3.0-SNAPSHOT` | Hive component image | + +### Database (Required) + +| Value | Default | Description | +|-------|---------|-------------| +| `cluster.database.type` | `postgres` | DB type: `postgres`, `mysql`, `derby` | +| `cluster.database.url` | | JDBC URL | +| `cluster.database.driver` | | JDBC driver class | +| `cluster.database.username` | | DB username | +| `cluster.database.passwordSecretRef.name` | | K8s Secret name | +| `cluster.database.passwordSecretRef.key` | | Key in the Secret (e.g. `password`) | +| `cluster.database.driverJarUrl` | | URL to download JDBC driver | + +### ZooKeeper (Required) + +| Value | Default | Description | +|-------|---------|-------------| +| `cluster.zookeeper.quorum` | | ZooKeeper connection string (e.g. `zookeeper:2181`) | + +### Storage (Required) + +| Value | Default | Description | +|-------|---------|-------------| +| `cluster.storage.coreSiteOverrides` | `{}` | `core-site.xml` properties (`fs.defaultFS`, `fs.s3a.*`, etc.) | +| `cluster.storage.envVars` | `[]` | Env vars for all pods (credentials, `HADOOP_OPTIONAL_TOOLS`) | +| `cluster.storage.externalJars` | `[]` | Connector JAR URLs downloaded at startup | +| `cluster.storage.volumes` | `[]` | Volumes for all pods (credential files) | +| `cluster.storage.volumeMounts` | `[]` | Volume mounts for all containers | + +### Metastore + +| Value | Default | Description | +|-------|---------|-------------| +| `cluster.metastore.enabled` | `true` | Deploy a managed Metastore | +| `cluster.metastore.externalUri` | | Thrift URI when `enabled: false` | +| `cluster.metastore.replicas` | `2` | Replica count | +| `cluster.metastore.warehouseDir` | `/hive/warehouse` | Warehouse directory | +| `cluster.metastore.resources` | `{}` | CPU/memory | +| `cluster.metastore.configOverrides` | `{}` | Extra `metastore-site.xml` properties | + +### HiveServer2 + +| Value | Default | Description | +|-------|---------|-------------| +| `cluster.hiveServer2.replicas` | `2` | Replica count | +| `cluster.hiveServer2.serviceType` | `ClusterIP` | K8s Service type | +| `cluster.hiveServer2.thriftPort` | `10000` | Thrift port | +| `cluster.hiveServer2.webUiPort` | `10002` | Web UI port | +| `cluster.hiveServer2.resources` | `{}` | CPU/memory | +| `cluster.hiveServer2.configOverrides` | `{}` | Extra `hive-site.xml` properties | +| `cluster.hiveServer2.externalJars` | `[]` | HS2-specific JARs | + +### LLAP + +| Value | Default | Description | +|-------|---------|-------------| +| `cluster.llap.enabled` | `true` | Enable LLAP daemons | +| `cluster.llap.replicas` | `2` | Replica count | +| `cluster.llap.executors` | `1` | Executors per daemon | +| `cluster.llap.memoryMb` | `1024` | Memory per daemon (MB) | +| `cluster.llap.serviceHosts` | `@llap0` | LLAP ZK identity | +| `cluster.llap.resources` | `{}` | CPU/memory | + +### Tez AM + +| Value | Default | Description | +|-------|---------|-------------| +| `cluster.tezAm.enabled` | `true` | Enable Tez Application Master | +| `cluster.tezAm.replicas` | `2` | Replica count | +| `cluster.tezAm.scratchStorageSize` | `1Gi` | Shared scratch PVC size | +| `cluster.tezAm.scratchStorageClassName` | | StorageClass (must support RWX) | +| `cluster.tezAm.resources` | `{}` | CPU/memory | -| Field | Type | Default | Description | -|-------|------|---------|-------------| -| `spec.image` | string | | Hive Docker image | -| `spec.imagePullPolicy` | string | `IfNotPresent` | Image pull policy | -| `spec.envVars` | list | | Environment variables injected into all component pods (e.g., storage credentials). Supports both literal values and `valueFrom.secretKeyRef`. | -| `spec.externalJars` | list | | URLs of JARs downloaded into all pods at startup (e.g., GCS connector, ABFS connector). The Docker entrypoint automatically adds them to the classpath. | -| `spec.volumes` | list | | Volumes added to all component pods (e.g., Secrets containing keytabs or service account keys) | -| `spec.volumeMounts` | list | | Volume mounts added to all component containers (e.g., mounting a GCS key at `/etc/gcs/key.json`) | +--- -### Metastore (`spec.metastore`) +## Upgrade and Uninstall -| Field | Type | Default | Description | -|-------|------|---------|-------------| -| `enabled` | boolean | `true` | Whether the operator deploys and manages a Metastore. Set `false` to use an external Metastore. | -| `externalUri` | string | | Thrift URI of the external Metastore (required when `enabled` is `false`, e.g. `thrift://host:9083`) | -| `replicas` | int | `1` | Number of Metastore replicas (ignored when `enabled` is `false`) | -| `warehouseDir` | string | | Warehouse directory (e.g. `s3a://hive/warehouse`) | -| `configOverrides` | map | | Extra `metastore-site.xml` properties | -| `resources.*` | object | | CPU/memory requests and limits | +### Upgrade (values only, no CRD changes) -### Metastore Database (`spec.metastore.database`) +```bash +helm upgrade hive ./helm/hive-operator -f my-values.yaml +``` -| Field | Type | Default | Description | -|-------|------|---------|-------------| -| `type` | string | `derby` | DB type: `postgres`, `mysql`, `derby` | -| `url` | string | | JDBC connection URL | -| `driver` | string | | JDBC driver class name | -| `username` | string | | Database username | -| `passwordSecretRef.name` | string | | Kubernetes Secret name containing the password | -| `passwordSecretRef.key` | string | | Key within the Secret | -| `driverJarUrl` | string | | URL to download the JDBC driver JAR at pod startup | +### Upgrade (with CRD schema changes) -### HiveServer2 (`spec.hiveServer2`) +Helm does **not** update CRDs on `helm upgrade`. If the operator version +includes CRD changes (new status fields, new spec fields), you must +re-apply the CRD manually: -| Field | Type | Default | Description | -|-------|------|---------|-------------| -| `replicas` | int | `1` | Number of HiveServer2 replicas | -| `serviceType` | string | `ClusterIP` | Kubernetes Service type (`ClusterIP`, `NodePort`, `LoadBalancer`) | -| `thriftPort` | int | `10000` | Thrift port | -| `webUiPort` | int | `10002` | Web UI port | -| `configOverrides` | map | | Extra `hive-site.xml` properties | -| `resources.*` | object | | CPU/memory requests and limits | +```bash +kubectl apply -f helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml +helm upgrade hive ./helm/hive-operator -f my-values.yaml +``` -### LLAP (`spec.llap`) +### Full Uninstall and Reinstall (clean slate) -| Field | Type | Default | Description | -|-------|------|---------|-------------| -| `enabled` | boolean | `false` | Enable LLAP daemons | -| `replicas` | int | `1` | Number of LLAP daemon replicas | -| `executors` | int | `1` | Executors per daemon | -| `memoryMb` | int | `2048` | Memory per daemon (MB) | -| `serviceHosts` | string | | LLAP service hosts identifier (e.g. `@llap0`) | -| `resources.*` | object | | CPU/memory requests and limits | +```bash +# Uninstall (removes operator + HiveCluster CR + all managed pods) +helm uninstall hive -### Tez AM (`spec.tezAm`) +# IMPORTANT: Always delete the CRD before reinstalling to ensure +# the updated schema is applied. Helm only creates CRDs on install, +# it never updates existing ones. +kubectl delete crd hiveclusters.hive.apache.org -| Field | Type | Default | Description | -|-------|------|---------|-------------| -| `enabled` | boolean | `false` | Enable standalone Tez Application Master | -| `replicas` | int | `1` | Number of Tez AM replicas | -| `scratchStorageSize` | string | `1Gi` | Storage size for the shared scratch PVC (ReadWriteMany) mounted on HS2 and TezAM at `/opt/hive/scratch` | -| `scratchStorageClassName` | string | | StorageClass for the shared scratch PVC. Must support ReadWriteMany. If empty, uses cluster default. | -| `resources.*` | object | | CPU/memory requests and limits | +# Reinstall +helm install hive ./helm/hive-operator -f my-values.yaml +``` -### ZooKeeper (`spec.zookeeper`) +### Remove Everything (including dependencies) -| Field | Type | Default | Description | -|-------|------|---------|-------------| -| `quorum` | string | `zookeeper:2181` | ZooKeeper connection string | +```bash +helm uninstall hive +kubectl delete crd hiveclusters.hive.apache.org +helm uninstall ozone postgres zookeeper --ignore-not-found +kubectl delete pvc data-zookeeper-0 --ignore-not-found +kubectl delete pvc data-postgres-postgresql-0 --ignore-not-found +kubectl delete secret hive-db-secret --ignore-not-found +``` +--- -### Hadoop (`spec.hadoop`) +## Advanced: Deploy via Operator Only (without Helm) -| Field | Type | Default | Description | -|-------|------|---------|-------------| -| `coreSiteOverrides` | map | | `core-site.xml` properties for filesystem configuration (e.g., `fs.defaultFS`, `fs.s3a.*`, `fs.azure.*`, `fs.gs.*`) | +If you prefer raw manifests over Helm, you can deploy the operator and create +HiveCluster CRs manually. This example uses Ozone as the storage backend. -### Resource requirements +### 1. Install the CRD -All components support a `resources` object with these fields: +```bash +kubectl apply -f helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml +``` -| Field | Example | -|-------|---------| -| `resources.requestsCpu` | `"500m"` | -| `resources.requestsMemory` | `"1Gi"` | -| `resources.limitsCpu` | `"2"` | -| `resources.limitsMemory` | `"4Gi"` | +### 2. Deploy RBAC and the Operator -## How Storage Configuration Works +```bash +kubectl create namespace hive-operator +kubectl apply -f config/rbac/ +export HIVE_VERSION=4.3.0-SNAPSHOT +envsubst < config/operator/deployment.yaml | kubectl apply -f - +``` -The operator is storage-agnostic — it does not hardcode any filesystem-specific -logic. You configure storage using three mechanisms: +### 3. Deploy Ozone -1. **`spec.hadoop.coreSiteOverrides`** — Filesystem properties written to - `core-site.xml` (e.g., `fs.defaultFS`, `fs.s3a.endpoint`, `fs.azure.*`, - `fs.gs.*`). This ConfigMap is projected into all component pods. +```bash +helm repo add ozone https://apache.github.io/ozone-helm-charts/ +helm install ozone ozone/ozone --version 0.2.0 --wait +sleep 50 +kubectl exec statefulset/ozone-om -- ozone sh volume create /s3v +kubectl exec statefulset/ozone-om -- ozone sh bucket create /s3v/hive +``` -2. **`spec.envVars`** — Environment variables injected into all component - containers (Metastore, HiveServer2, LLAP, Tez AM). Use this for credentials - that should not appear in ConfigMaps, and for `HADOOP_OPTIONAL_TOOLS` which - tells the Hadoop classpath to include optional connector JARs (e.g., - `hadoop-aws` for S3A, `hadoop-azure` for ABFS). Supports both literal values - and `valueFrom.secretKeyRef` for Kubernetes Secrets. - -3. **`spec.externalJars`** — URLs of connector JARs (GCS, ABFS, etc.) - downloaded via an init container into `/tmp/ext-jars`. The Hive Docker - entrypoint automatically copies them to `$HIVE_HOME/lib/` at startup. - Required for filesystems whose implementation class is not bundled in the - Hive/Hadoop image. +### 4. Create a HiveCluster CR -4. **`spec.volumes` / `spec.volumeMounts`** — Volumes and mounts added to all - component pods. Use this to mount credential files (GCS service account JSON, - Kerberos keytabs, etc.) from Kubernetes Secrets into a known path. +Full-HA (Metastore x2, HS2 x2, LLAP x2, TezAM x2): -This design supports any Hadoop-compatible filesystem: S3A (AWS, MinIO, Ozone), -ABFS (Azure), GCS (Google Cloud), HDFS, or Ozone native. +```bash +envsubst < config/samples/hivecluster-full-ha.yaml | kubectl apply -f - +``` -### Overrides +Or minimal (Metastore x1, HS2 x1, no LLAP/TezAM): -Properties in `spec.hadoop.coreSiteOverrides` populate `core-site.xml`. -Properties in `spec.metastore.configOverrides` and -`spec.hiveServer2.configOverrides` override values in `metastore-site.xml` and -`hive-site.xml` respectively. +```bash +envsubst < config/samples/hivecluster-minimal.yaml | kubectl apply -f - +``` -## Cleanup +### 5. Cleanup ```bash -# Delete the HiveCluster (removes all managed pods, services, etc.) -kubectl delete hivecluster my-hive - -# Remove the operator +kubectl delete hivecluster hive envsubst < config/operator/deployment.yaml | kubectl delete -f - -kubectl delete -f config/rbac/cluster-role-binding.yaml -kubectl delete -f config/rbac/cluster-role.yaml -kubectl delete -f config/rbac/service-account.yaml +kubectl delete -f config/rbac/ kubectl delete namespace hive-operator - -# Remove the CRD +# Always delete CRD to ensure a clean reinstall picks up schema changes kubectl delete crd hiveclusters.hive.apache.org +kubectl delete pvc data-zookeeper-0 --ignore-not-found +kubectl delete pvc data-postgres-postgresql-0 --ignore-not-found +kubectl delete secret hive-db-secret --ignore-not-found +helm uninstall ozone postgres zookeeper --ignore-not-found ``` -Remove Everything -```bash -kubectl delete hivecluster my-hive --ignore-not-found -envsubst < config/operator/deployment.yaml | kubectl delete --ignore-not-found -f - -kubectl delete -f config/rbac/ --ignore-not-found -kubectl delete namespace hive-operator --ignore-not-found -kubectl delete secret hive-db-secret --ignore-not-found -kubectl delete crd hiveclusters.hive.apache.org --ignore-not-found -helm uninstall postgres --ignore-not-found 2>/dev/null || true -kubectl delete pvc data-postgres-postgresql-0 --ignore-not-found -helm uninstall zookeeper --ignore-not-found 2>/dev/null || true -kubectl delete pvc data-zookeeper-0 --ignore-not-found -helm uninstall ozone --ignore-not-found 2>/dev/null || true +--- + +## Architecture + +``` +HiveCluster CR + | + v +HiveClusterReconciler + | + +-- HadoopConfigMapDependent (core-site.xml) + +-- MetastoreConfigMapDependent (metastore-site.xml) + +-- HiveServer2ConfigMapDependent (hive-site.xml + tez-site.xml) + +-- SchemaInitJobDependent (schematool -initOrUpgradeSchema) + +-- MetastoreDeploymentDependent --> MetastoreServiceDependent + +-- HiveServer2DeploymentDependent --> HiveServer2ServiceDependent + +-- LlapStatefulSetDependent --> LlapServiceDependent (optional) + +-- ScratchPvcDependent (shared scratch PVC, optional) + +-- TezAmStatefulSetDependent --> TezAmServiceDependent (optional) ``` + +**Startup order:** +1. ConfigMaps (Hadoop, Metastore, HiveServer2) +2. Schema Init Job [if Metastore enabled] +3. Metastore Deployment + Service [if enabled] +4. HiveServer2 Deployment + Service +5. LLAP + TezAM [if enabled] diff --git a/packaging/src/kubernetes/config/samples/hivecluster-full-ha.yaml b/packaging/src/kubernetes/config/samples/hivecluster-full-ha.yaml index aaf3fbebcec8..79612607f1c9 100644 --- a/packaging/src/kubernetes/config/samples/hivecluster-full-ha.yaml +++ b/packaging/src/kubernetes/config/samples/hivecluster-full-ha.yaml @@ -17,7 +17,7 @@ apiVersion: hive.apache.org/v1alpha1 kind: HiveCluster metadata: - name: my-hive + name: hive spec: image: apache/hive:${HIVE_VERSION} imagePullPolicy: IfNotPresent diff --git a/packaging/src/kubernetes/config/samples/hivecluster-minimal.yaml b/packaging/src/kubernetes/config/samples/hivecluster-minimal.yaml index 617cc73836ff..e42b10396dd5 100644 --- a/packaging/src/kubernetes/config/samples/hivecluster-minimal.yaml +++ b/packaging/src/kubernetes/config/samples/hivecluster-minimal.yaml @@ -17,7 +17,7 @@ apiVersion: hive.apache.org/v1alpha1 kind: HiveCluster metadata: - name: my-hive + name: hive spec: image: apache/hive:${HIVE_VERSION} imagePullPolicy: IfNotPresent diff --git a/packaging/src/kubernetes/helm/hive-operator/Chart.yaml b/packaging/src/kubernetes/helm/hive-operator/Chart.yaml new file mode 100644 index 000000000000..b1e8104b155e --- /dev/null +++ b/packaging/src/kubernetes/helm/hive-operator/Chart.yaml @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to you under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v2 +name: hive-operator +description: Apache Hive Kubernetes Operator - deploys and manages Hive clusters on Kubernetes +type: application +version: "4.3.0-SNAPSHOT" +appVersion: "4.3.0-SNAPSHOT" +kubeVersion: ">=1.25.0" +keywords: + - hive + - hadoop + - sql + - data-warehouse + - kubernetes-operator +home: https://hive.apache.org/ +sources: + - https://github.com/apache/hive +maintainers: + - name: Apache Hive + url: https://hive.apache.org/ diff --git a/packaging/src/kubernetes/src/gen/hiveclusters.hive.apache.org-v1.yml b/packaging/src/kubernetes/helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml similarity index 100% rename from packaging/src/kubernetes/src/gen/hiveclusters.hive.apache.org-v1.yml rename to packaging/src/kubernetes/helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml diff --git a/packaging/src/kubernetes/helm/hive-operator/templates/_helpers.tpl b/packaging/src/kubernetes/helm/hive-operator/templates/_helpers.tpl new file mode 100644 index 000000000000..ab398ff9fea1 --- /dev/null +++ b/packaging/src/kubernetes/helm/hive-operator/templates/_helpers.tpl @@ -0,0 +1,52 @@ +{{/* +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to you under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{/* Chart name */}} +{{- define "hive-operator.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* Fullname: release-name + chart-name */}} +{{- define "hive-operator.fullname" -}} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* Operator labels */}} +{{- define "hive-operator.labels" -}} +app.kubernetes.io/name: hive-kubernetes-operator +app.kubernetes.io/instance: {{ .Release.Name }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +helm.sh/chart: {{ printf "%s-%s" .Chart.Name .Chart.Version }} +{{- end }} + +{{/* Operator selector labels */}} +{{- define "hive-operator.selectorLabels" -}} +app.kubernetes.io/name: hive-kubernetes-operator +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* ServiceAccount name */}} +{{- define "hive-operator.serviceAccountName" -}} +{{- printf "%s" (include "hive-operator.fullname" .) }} +{{- end }} + +{{/* HiveCluster CR name */}} +{{- define "hive-operator.clusterName" -}} +{{- .Values.cluster.name | default .Release.Name }} +{{- end }} diff --git a/packaging/src/kubernetes/helm/hive-operator/templates/clusterrole.yaml b/packaging/src/kubernetes/helm/hive-operator/templates/clusterrole.yaml new file mode 100644 index 000000000000..d27e1fea8c6f --- /dev/null +++ b/packaging/src/kubernetes/helm/hive-operator/templates/clusterrole.yaml @@ -0,0 +1,52 @@ +{{/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "hive-operator.fullname" . }} + labels: + {{- include "hive-operator.labels" . | nindent 4 }} +rules: + # HiveCluster CRD management + - apiGroups: ["hive.apache.org"] + resources: ["hiveclusters", "hiveclusters/status", "hiveclusters/finalizers"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + # Deployments and StatefulSets + - apiGroups: ["apps"] + resources: ["deployments", "statefulsets"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + # Jobs for schema initialization + - apiGroups: ["batch"] + resources: ["jobs"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + # Services, ConfigMaps, and PersistentVolumeClaims + - apiGroups: [""] + resources: ["services", "configmaps", "persistentvolumeclaims"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + # Secrets: read-only for DB password references + - apiGroups: [""] + resources: ["secrets"] + verbs: ["get", "list", "watch"] + # Events for status reporting + - apiGroups: [""] + resources: ["events"] + verbs: ["create", "patch"] + # Pods: read-only for readiness checking + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch"] diff --git a/packaging/src/kubernetes/helm/hive-operator/templates/clusterrolebinding.yaml b/packaging/src/kubernetes/helm/hive-operator/templates/clusterrolebinding.yaml new file mode 100644 index 000000000000..10ef23316ab5 --- /dev/null +++ b/packaging/src/kubernetes/helm/hive-operator/templates/clusterrolebinding.yaml @@ -0,0 +1,31 @@ +{{/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "hive-operator.fullname" . }} + labels: + {{- include "hive-operator.labels" . | nindent 4 }} +subjects: + - kind: ServiceAccount + name: {{ include "hive-operator.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "hive-operator.fullname" . }} diff --git a/packaging/src/kubernetes/helm/hive-operator/templates/deployment.yaml b/packaging/src/kubernetes/helm/hive-operator/templates/deployment.yaml new file mode 100644 index 000000000000..1c57badfeec0 --- /dev/null +++ b/packaging/src/kubernetes/helm/hive-operator/templates/deployment.yaml @@ -0,0 +1,46 @@ +{{/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/}} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "hive-operator.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "hive-operator.labels" . | nindent 4 }} +spec: + replicas: 1 + selector: + matchLabels: + {{- include "hive-operator.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "hive-operator.selectorLabels" . | nindent 8 }} + spec: + serviceAccountName: {{ include "hive-operator.serviceAccountName" . }} + containers: + - name: operator + image: "{{ .Values.operator.image.repository }}:{{ .Values.operator.image.tag }}" + imagePullPolicy: {{ .Values.operator.image.pullPolicy }} + env: + - name: OPERATOR_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + resources: + {{- toYaml .Values.operator.resources | nindent 12 }} diff --git a/packaging/src/kubernetes/helm/hive-operator/templates/hivecluster.yaml b/packaging/src/kubernetes/helm/hive-operator/templates/hivecluster.yaml new file mode 100644 index 000000000000..504d6f20dc29 --- /dev/null +++ b/packaging/src/kubernetes/helm/hive-operator/templates/hivecluster.yaml @@ -0,0 +1,181 @@ +{{/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/}} +{{- if .Values.cluster.enabled }} +apiVersion: hive.apache.org/v1alpha1 +kind: HiveCluster +metadata: + name: {{ include "hive-operator.clusterName" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "hive-operator.labels" . | nindent 4 }} +spec: + image: {{ .Values.cluster.image }} + imagePullPolicy: {{ .Values.cluster.imagePullPolicy }} + + metastore: + enabled: {{ .Values.cluster.metastore.enabled }} + {{- if .Values.cluster.metastore.enabled }} + replicas: {{ .Values.cluster.metastore.replicas }} + warehouseDir: {{ .Values.cluster.metastore.warehouseDir | quote }} + database: + type: {{ .Values.cluster.database.type | quote }} + {{- if .Values.cluster.database.url }} + url: {{ .Values.cluster.database.url | quote }} + {{- end }} + {{- if .Values.cluster.database.driver }} + driver: {{ .Values.cluster.database.driver | quote }} + {{- end }} + {{- if .Values.cluster.database.username }} + username: {{ .Values.cluster.database.username | quote }} + {{- end }} + {{- if and .Values.cluster.database.passwordSecretRef .Values.cluster.database.passwordSecretRef.name }} + passwordSecretRef: + name: {{ .Values.cluster.database.passwordSecretRef.name | quote }} + key: {{ .Values.cluster.database.passwordSecretRef.key | default "password" | quote }} + {{- end }} + {{- if .Values.cluster.database.driverJarUrl }} + driverJarUrl: {{ .Values.cluster.database.driverJarUrl | quote }} + {{- end }} + {{- if .Values.cluster.metastore.resources }} + resources: + {{- toYaml .Values.cluster.metastore.resources | nindent 6 }} + {{- end }} + {{- if .Values.cluster.metastore.configOverrides }} + configOverrides: + {{- toYaml .Values.cluster.metastore.configOverrides | nindent 6 }} + {{- end }} + {{- if .Values.cluster.metastore.extraVolumes }} + extraVolumes: + {{- toYaml .Values.cluster.metastore.extraVolumes | nindent 6 }} + {{- end }} + {{- if .Values.cluster.metastore.extraVolumeMounts }} + extraVolumeMounts: + {{- toYaml .Values.cluster.metastore.extraVolumeMounts | nindent 6 }} + {{- end }} + {{- else }} + {{- if .Values.cluster.metastore.externalUri }} + externalUri: {{ .Values.cluster.metastore.externalUri | quote }} + {{- end }} + {{- end }} + + hiveServer2: + replicas: {{ .Values.cluster.hiveServer2.replicas }} + serviceType: {{ .Values.cluster.hiveServer2.serviceType | quote }} + thriftPort: {{ .Values.cluster.hiveServer2.thriftPort }} + webUiPort: {{ .Values.cluster.hiveServer2.webUiPort }} + {{- if .Values.cluster.hiveServer2.resources }} + resources: + {{- toYaml .Values.cluster.hiveServer2.resources | nindent 6 }} + {{- end }} + {{- if .Values.cluster.hiveServer2.configOverrides }} + configOverrides: + {{- toYaml .Values.cluster.hiveServer2.configOverrides | nindent 6 }} + {{- end }} + {{- if .Values.cluster.hiveServer2.externalJars }} + externalJars: + {{- toYaml .Values.cluster.hiveServer2.externalJars | nindent 6 }} + {{- end }} + {{- if .Values.cluster.hiveServer2.extraVolumes }} + extraVolumes: + {{- toYaml .Values.cluster.hiveServer2.extraVolumes | nindent 6 }} + {{- end }} + {{- if .Values.cluster.hiveServer2.extraVolumeMounts }} + extraVolumeMounts: + {{- toYaml .Values.cluster.hiveServer2.extraVolumeMounts | nindent 6 }} + {{- end }} + + llap: + enabled: {{ .Values.cluster.llap.enabled }} + {{- if .Values.cluster.llap.enabled }} + replicas: {{ .Values.cluster.llap.replicas }} + executors: {{ .Values.cluster.llap.executors }} + memoryMb: {{ .Values.cluster.llap.memoryMb }} + serviceHosts: {{ .Values.cluster.llap.serviceHosts | quote }} + {{- if .Values.cluster.llap.resources }} + resources: + {{- toYaml .Values.cluster.llap.resources | nindent 6 }} + {{- end }} + {{- if .Values.cluster.llap.configOverrides }} + configOverrides: + {{- toYaml .Values.cluster.llap.configOverrides | nindent 6 }} + {{- end }} + {{- if .Values.cluster.llap.extraVolumes }} + extraVolumes: + {{- toYaml .Values.cluster.llap.extraVolumes | nindent 6 }} + {{- end }} + {{- if .Values.cluster.llap.extraVolumeMounts }} + extraVolumeMounts: + {{- toYaml .Values.cluster.llap.extraVolumeMounts | nindent 6 }} + {{- end }} + {{- end }} + + tezAm: + enabled: {{ .Values.cluster.tezAm.enabled }} + {{- if .Values.cluster.tezAm.enabled }} + replicas: {{ .Values.cluster.tezAm.replicas }} + scratchStorageSize: {{ .Values.cluster.tezAm.scratchStorageSize | quote }} + {{- if .Values.cluster.tezAm.scratchStorageClassName }} + scratchStorageClassName: {{ .Values.cluster.tezAm.scratchStorageClassName | quote }} + {{- end }} + {{- if .Values.cluster.tezAm.resources }} + resources: + {{- toYaml .Values.cluster.tezAm.resources | nindent 6 }} + {{- end }} + {{- if .Values.cluster.tezAm.configOverrides }} + configOverrides: + {{- toYaml .Values.cluster.tezAm.configOverrides | nindent 6 }} + {{- end }} + {{- if .Values.cluster.tezAm.extraVolumes }} + extraVolumes: + {{- toYaml .Values.cluster.tezAm.extraVolumes | nindent 6 }} + {{- end }} + {{- if .Values.cluster.tezAm.extraVolumeMounts }} + extraVolumeMounts: + {{- toYaml .Values.cluster.tezAm.extraVolumeMounts | nindent 6 }} + {{- end }} + {{- end }} + + zookeeper: + quorum: {{ .Values.cluster.zookeeper.quorum | quote }} + + {{- if .Values.cluster.storage.coreSiteOverrides }} + hadoop: + coreSiteOverrides: + {{- toYaml .Values.cluster.storage.coreSiteOverrides | nindent 6 }} + {{- end }} + + {{- if .Values.cluster.storage.envVars }} + envVars: + {{- toYaml .Values.cluster.storage.envVars | nindent 4 }} + {{- end }} + + {{- if .Values.cluster.storage.externalJars }} + externalJars: + {{- toYaml .Values.cluster.storage.externalJars | nindent 4 }} + {{- end }} + + {{- if .Values.cluster.storage.volumes }} + volumes: + {{- toYaml .Values.cluster.storage.volumes | nindent 4 }} + {{- end }} + + {{- if .Values.cluster.storage.volumeMounts }} + volumeMounts: + {{- toYaml .Values.cluster.storage.volumeMounts | nindent 4 }} + {{- end }} +{{- end }} diff --git a/packaging/src/kubernetes/helm/hive-operator/templates/serviceaccount.yaml b/packaging/src/kubernetes/helm/hive-operator/templates/serviceaccount.yaml new file mode 100644 index 000000000000..39085bf90d3b --- /dev/null +++ b/packaging/src/kubernetes/helm/hive-operator/templates/serviceaccount.yaml @@ -0,0 +1,24 @@ +{{/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "hive-operator.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "hive-operator.labels" . | nindent 4 }} diff --git a/packaging/src/kubernetes/helm/hive-operator/values.yaml b/packaging/src/kubernetes/helm/hive-operator/values.yaml new file mode 100644 index 000000000000..093e104cf746 --- /dev/null +++ b/packaging/src/kubernetes/helm/hive-operator/values.yaml @@ -0,0 +1,158 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to you under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# ============================================================================= +# Hive Kubernetes Operator Helm Values +# +# QUICK START: Only set values under "cluster.database", "cluster.zookeeper", +# and "cluster.storage" — the chart defaults to a Full-HA cluster with +# Metastore, HiveServer2, LLAP, and Tez AM all enabled. +# ============================================================================= + +# -- Operator Deployment Configuration ---------------------------------------- + +operator: + image: + repository: apache/hive + tag: "operator-4.3.0-SNAPSHOT" + pullPolicy: IfNotPresent + resources: + requests: + cpu: 200m + memory: 256Mi + limits: + memory: 512Mi + +# -- HiveCluster Instance Configuration --------------------------------------- + +# Set to false to install only the operator (no HiveCluster CR created). +# Useful if you want to manage HiveCluster CRs yourself. +cluster: + enabled: true + name: hive + + # Hive component image (not the operator image) + image: "apache/hive:4.3.0-SNAPSHOT" + imagePullPolicy: IfNotPresent + + # --------------------------------------------------------------------------- + # DATABASE (Required) — RDBMS for the Hive Metastore backend + # --------------------------------------------------------------------------- + database: + type: postgres + url: "" # e.g. "jdbc:postgresql://postgres:5432/metastore" + driver: "" # e.g. "org.postgresql.Driver" + username: "" # e.g. "hive" + passwordSecretRef: + name: "" # e.g. "hive-db-secret" + key: "" # e.g. "password" + driverJarUrl: "" # e.g. "https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" + + # --------------------------------------------------------------------------- + # ZOOKEEPER (Required) — for Tez session registry and LLAP + # --------------------------------------------------------------------------- + zookeeper: + quorum: "" # e.g. "zookeeper:2181" + + # --------------------------------------------------------------------------- + # STORAGE (Required) — Hadoop filesystem configuration + # --------------------------------------------------------------------------- + storage: + # core-site.xml properties (filesystem endpoint, auth settings, etc.) + coreSiteOverrides: {} + # fs.defaultFS: "s3a://hive" + # fs.s3a.endpoint: "http://ozone-s3g-rest:9878" + # fs.s3a.path.style.access: "true" + + # Environment variables injected into all pods (credentials, HADOOP_OPTIONAL_TOOLS) + envVars: [] + # - name: HADOOP_OPTIONAL_TOOLS + # value: "hadoop-aws" + # - name: AWS_ACCESS_KEY_ID + # value: "ozone" + # - name: AWS_SECRET_ACCESS_KEY + # value: "ozone" + + # URLs of connector JARs downloaded at pod startup (GCS, ABFS connectors) + externalJars: [] + # - "https://repo1.maven.org/maven2/com/google/cloud/bigdataoss/gcs-connector/hadoop3-2.2.25/gcs-connector-hadoop3-2.2.25-shaded.jar" + + # Volumes added to all pods (for mounting credential files) + volumes: [] + # - name: gcs-key + # secret: + # secretName: gcs-creds + + # Volume mounts added to all containers + volumeMounts: [] + # - name: gcs-key + # mountPath: /etc/gcs + # readOnly: true + + # --------------------------------------------------------------------------- + # METASTORE — defaults to enabled, 2 replicas (HA) + # --------------------------------------------------------------------------- + metastore: + enabled: true + replicas: 2 + warehouseDir: "/hive/warehouse" + resources: {} + configOverrides: {} + extraVolumes: [] + extraVolumeMounts: [] + # Set to use an external Metastore instead of deploying one: + # enabled: false + # externalUri: "thrift://external-metastore:9083" + + # --------------------------------------------------------------------------- + # HIVESERVER2 — defaults to 2 replicas (HA) + # --------------------------------------------------------------------------- + hiveServer2: + replicas: 2 + serviceType: ClusterIP + thriftPort: 10000 + webUiPort: 10002 + resources: {} + configOverrides: {} + externalJars: [] + extraVolumes: [] + extraVolumeMounts: [] + + # --------------------------------------------------------------------------- + # LLAP — enabled by default for full-HA + # --------------------------------------------------------------------------- + llap: + enabled: true + replicas: 2 + executors: 1 + memoryMb: 1024 + serviceHosts: "@llap0" + resources: {} + configOverrides: {} + extraVolumes: [] + extraVolumeMounts: [] + + # --------------------------------------------------------------------------- + # TEZ AM — enabled by default for full-HA + # --------------------------------------------------------------------------- + tezAm: + enabled: true + replicas: 2 + scratchStorageSize: "1Gi" + scratchStorageClassName: "" + resources: {} + configOverrides: {} + extraVolumes: [] + extraVolumeMounts: [] diff --git a/packaging/src/kubernetes/pom.xml b/packaging/src/kubernetes/pom.xml index b45b1b76968a..c41f18d4180c 100644 --- a/packaging/src/kubernetes/pom.xml +++ b/packaging/src/kubernetes/pom.xml @@ -110,24 +110,37 @@ - + + + org.apache.rat + apache-rat-plugin + + + helm/hive-operator/crds/** + + + + org.apache.maven.plugins maven-antrun-plugin - copy-crd-to-src-gen + copy-crd-to-helm-crds compile run - + + + + diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveDependentResource.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveDependentResource.java index 851fb7bb7836..a4604bab3845 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveDependentResource.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveDependentResource.java @@ -52,6 +52,7 @@ import io.fabric8.kubernetes.api.model.VolumeBuilder; import io.fabric8.kubernetes.api.model.VolumeMount; import io.fabric8.kubernetes.api.model.VolumeMountBuilder; +import io.fabric8.kubernetes.client.KubernetesClientException; import io.javaoperatorsdk.operator.api.reconciler.Context; import io.javaoperatorsdk.operator.processing.dependent.Matcher; import io.javaoperatorsdk.operator.processing.dependent.kubernetes.CRUDKubernetesDependentResource; @@ -100,6 +101,27 @@ protected HiveDependentResource(Class resourceType) { super(resourceType); } + /** + * Catches 409 AlreadyExists during resource creation caused by + * informer lag — the resource exists on the API server but + * the informer cache hasn't indexed it yet, so JOSDK bypasses + * {@link #match} and calls create directly. + */ + @Override + protected R handleCreate(R desired, P primary, Context

context) { + try { + return super.handleCreate(desired, primary, context); + } catch (KubernetesClientException e) { + if (e.getCode() == 409) { + LOG.info("Resource {} already exists (informer lag), " + + "will reconcile on next event", + desired.getMetadata().getName()); + return desired; + } + throw e; + } + } + /** * Disable Server-Side Apply. SSA on Docker Desktop Kubernetes causes * dual ReplicaSet creation (two SSA applies within the same second From e25466c41f6ae73f43581bc752005c70ff0aeeea Mon Sep 17 00:00:00 2001 From: Ayush Saxena Date: Thu, 21 May 2026 12:35:24 +0530 Subject: [PATCH 3/3] Upgrade & Refactor --- packaging/src/kubernetes/README.md | 356 +----------------- .../config/samples/hivecluster-full-ha.yaml | 5 +- .../config/samples/hivecluster-minimal.yaml | 2 +- .../crds/hiveclusters.hive.apache.org-v1.yml | 40 +- .../hive-operator/templates/hivecluster.yaml | 4 +- .../kubernetes/helm/hive-operator/values.yaml | 2 - packaging/src/kubernetes/pom.xml | 28 +- .../kubernetes/operator/HiveOperatorMain.java | 12 +- .../dependent/HadoopConfigMapDependent.java | 5 +- .../dependent/HiveDependentResource.java | 224 +++-------- .../HiveServer2ConfigMapDependent.java | 5 +- .../HiveServer2DeploymentDependent.java | 51 ++- .../HiveServer2ServiceDependent.java | 20 +- .../dependent/LlapConfigMapDependent.java | 5 +- .../dependent/LlapServiceDependent.java | 5 +- .../dependent/LlapStatefulSetDependent.java | 8 +- .../MetastoreConfigMapDependent.java | 5 +- .../MetastoreDeploymentDependent.java | 20 +- .../dependent/MetastoreServiceDependent.java | 15 +- .../dependent/SchemaInitJobDependent.java | 5 +- .../dependent/ScratchPvcDependent.java | 5 +- .../dependent/TezAmServiceDependent.java | 5 +- .../dependent/TezAmStatefulSetDependent.java | 8 +- .../operator/model/HiveCluster.java | 2 +- .../operator/model/HiveClusterSpec.java | 21 +- .../operator/model/spec/DatabaseConfig.java | 6 +- .../operator/model/spec/HiveServer2Spec.java | 9 +- .../operator/model/spec/LlapSpec.java | 9 +- .../operator/model/spec/MetastoreSpec.java | 9 +- .../model/spec/ResourceRequirementsSpec.java | 11 +- .../operator/model/spec/TezAmSpec.java | 6 +- .../operator/model/spec/ZookeeperSpec.java | 11 +- .../reconciler/HiveClusterReconciler.java | 154 +++----- .../kubernetes/operator/util/ConfigUtils.java | 118 ++++++ .../operator/util/HiveConfigBuilder.java | 102 ++--- pom.xml | 4 +- 36 files changed, 504 insertions(+), 793 deletions(-) create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/ConfigUtils.java diff --git a/packaging/src/kubernetes/README.md b/packaging/src/kubernetes/README.md index 210cfa238e5e..1fc11623240c 100644 --- a/packaging/src/kubernetes/README.md +++ b/packaging/src/kubernetes/README.md @@ -454,347 +454,6 @@ helm install hive ./helm/hive-operator -f values.yaml --- -### External Iceberg REST Catalog with Apache Polaris (AWS S3) - -[Apache Polaris](https://polaris.apache.org/) is an Iceberg REST catalog with -built-in OAuth2. Requires **real AWS S3** (Polaris uses STS credential vending). -See `packaging/src/docker/thirdparties/polaris/` for the Docker Compose equivalent. - -**Step 1: Create AWS secret and deploy Polaris** - -```bash -kubectl create secret generic aws-s3-creds \ - --from-literal=accessKey="" \ - --from-literal=secretKey="" - -kubectl run polaris --image=apache/polaris:latest --port=8181 \ - --overrides='{ - "spec": { - "containers": [{ - "name": "polaris", - "image": "apache/polaris:latest", - "ports": [{"containerPort": 8181}], - "env": [ - {"name": "POLARIS_BOOTSTRAP_CREDENTIALS", "value": "POLARIS,iceberg-client,iceberg-client-secret"}, - {"name": "POLARIS_REALM_CONTEXT_REALMS", "value": "POLARIS"}, - {"name": "QUARKUS_OTEL_SDK_DISABLED", "value": "true"}, - {"name": "POLARIS_READINESS_IGNORE_SEVERE_ISSUES", "value": "true"}, - {"name": "AWS_REGION", "value": "ap-south-1"}, - {"name": "AWS_ACCESS_KEY_ID", "valueFrom": {"secretKeyRef": {"name": "aws-s3-creds", "key": "accessKey"}}}, - {"name": "AWS_SECRET_ACCESS_KEY", "valueFrom": {"secretKeyRef": {"name": "aws-s3-creds", "key": "secretKey"}}} - ] - }] - } - }' -kubectl expose pod polaris --port=8181 --name=polaris -kubectl wait --for=condition=Ready pod/polaris --timeout=120s -``` - -**Step 2: Bootstrap Polaris catalog** - -```bash -kubectl run polaris-init --rm -it --restart=Never --image=alpine/curl -- sh -c ' - apk add --no-cache jq > /dev/null 2>&1 - - # Wait for Polaris - until curl -sf http://polaris:8181/api/catalog/v1/oauth/tokens \ - --user "iceberg-client:iceberg-client-secret" \ - -H "Polaris-Realm: POLARIS" \ - -d grant_type=client_credentials -d scope=PRINCIPAL_ROLE:ALL > /dev/null 2>&1; do - sleep 2 - done - - TOKEN=$(curl -s http://polaris:8181/api/catalog/v1/oauth/tokens \ - --user "iceberg-client:iceberg-client-secret" \ - -H "Polaris-Realm: POLARIS" \ - -d grant_type=client_credentials -d scope=PRINCIPAL_ROLE:ALL | jq -r .access_token) - echo "Token: ${TOKEN:0:20}..." - - # Create catalog with S3 storage - curl -s -H "Authorization: Bearer $TOKEN" -H "Polaris-Realm: POLARIS" \ - -H "Content-Type: application/json" \ - http://polaris:8181/api/management/v1/catalogs \ - -d "{\"catalog\":{\"name\":\"ice01\",\"type\":\"INTERNAL\",\"readOnly\":false, - \"properties\":{\"default-base-location\":\"s3://ayush-k8s-bucket /warehouse\"}, - \"storageConfigInfo\":{\"storageType\":\"S3\", - \"roleArn\":\"arn:aws:iam:::role/\", - \"allowedLocations\":[\"s3://ayush-k8s-bucket /\"]}}}" - echo "" - echo "Polaris bootstrap complete." -' -``` - -**Step 3: Install Hive** - -```bash -helm install hive ./helm/hive-operator \ - --set cluster.metastore.enabled=false \ - --set cluster.zookeeper.quorum="zookeeper:2181" \ - --set 'cluster.storage.envVars[0].name=HADOOP_OPTIONAL_TOOLS' \ - --set 'cluster.storage.envVars[0].value=hadoop-aws' \ - --set 'cluster.storage.envVars[1].name=AWS_ACCESS_KEY_ID' \ - --set 'cluster.storage.envVars[1].valueFrom.secretKeyRef.name=aws-s3-creds' \ - --set 'cluster.storage.envVars[1].valueFrom.secretKeyRef.key=accessKey' \ - --set 'cluster.storage.envVars[2].name=AWS_SECRET_ACCESS_KEY' \ - --set 'cluster.storage.envVars[2].valueFrom.secretKeyRef.name=aws-s3-creds' \ - --set 'cluster.storage.envVars[2].valueFrom.secretKeyRef.key=secretKey' \ - --set 'cluster.hiveServer2.configOverrides.hive\.metastore\.warehouse\.dir=s3a:///warehouse' \ - --set 'cluster.hiveServer2.configOverrides.metastore\.catalog\.default=ice01' \ - --set 'cluster.hiveServer2.configOverrides.metastore\.client\.impl=org.apache.iceberg.hive.client.HiveRESTCatalogClient' \ - --set 'cluster.hiveServer2.configOverrides.iceberg\.catalog\.ice01\.uri=http://polaris:8181/api/catalog' \ - --set 'cluster.hiveServer2.configOverrides.iceberg\.catalog\.ice01\.type=rest' \ - --set 'cluster.hiveServer2.configOverrides.iceberg\.catalog\.ice01\.warehouse=ice01' \ - --set 'cluster.hiveServer2.configOverrides.iceberg\.catalog\.ice01\.rest\.auth\.type=oauth2' \ - --set 'cluster.hiveServer2.configOverrides.iceberg\.catalog\.ice01\.oauth2-server-uri=http://polaris:8181/api/catalog/v1/oauth/tokens' \ - --set 'cluster.hiveServer2.configOverrides.iceberg\.catalog\.ice01\.credential=iceberg-client:iceberg-client-secret' \ - --set 'cluster.hiveServer2.configOverrides.iceberg\.catalog\.ice01\.scope=PRINCIPAL_ROLE:ALL' \ - --set-string 'cluster.hiveServer2.configOverrides.hive\.scheduled\.queries\.executor\.enabled=false' \ - --set-string 'cluster.hiveServer2.configOverrides.hive\.materializedview\.rebuild\.incremental=false' \ - --set 'cluster.hiveServer2.configOverrides.hive\.metastore\.transactional\.event\.listeners=' \ - --set-string 'cluster.hiveServer2.configOverrides.hive\.notification\.event\.poll\.interval=0' \ - --set-string 'cluster.hiveServer2.configOverrides.hive\.stats\.autogather=false' \ - --set-string 'cluster.hiveServer2.configOverrides.hive\.stats\.fetch\.column\.stats=false' \ - --set-string 'cluster.hiveServer2.configOverrides.hive\.stats\.estimate=false' -``` - -**Cleanup:** - -```bash -helm uninstall hive -kubectl delete pod polaris -kubectl delete svc polaris -kubectl delete secret aws-s3-creds -``` - ---- - -### External Iceberg REST Catalog with Apache Gravitino (Ozone Storage) - -[Apache Gravitino](https://gravitino.apache.org/) is an Iceberg REST catalog -that uses an external OAuth2 provider (Keycloak) for authentication. This -setup mirrors the working Docker Compose configuration in -`packaging/src/docker/thirdparties/gravitino/` but adapted for Kubernetes with -Ozone S3 storage. - -**Step 1: Deploy Keycloak with the Hive realm** - -```bash -# Create Keycloak realm config (defines iceberg-client with service account) -kubectl create configmap keycloak-realm --from-file=realm-export.json=<(cat <<'EOF' -{ - "realm": "hive", - "enabled": true, - "clients": [ - { - "clientId": "iceberg-client", - "secret": "iceberg-client-secret", - "enabled": true, - "redirectUris": ["*"], - "serviceAccountsEnabled": true, - "protocol": "openid-connect", - "publicClient": false, - "directAccessGrantsEnabled": false, - "standardFlowEnabled": false, - "defaultClientScopes": ["catalog"], - "optionalClientScopes": [], - "protocolMappers": [ - { - "name": "audience", - "protocol": "openid-connect", - "protocolMapper": "oidc-audience-mapper", - "consentRequired": false, - "config": { - "included.client.audience": "hive-iceberg", - "id.token.claim": "false", - "access.token.claim": "true" - } - } - ], - "attributes": { - "access.token.lifespan": "3600" - } - } - ], - "clientScopes": [ - { - "name": "catalog", - "protocol": "openid-connect", - "attributes": {}, - "protocolMappers": [] - } - ] -} -EOF -) - -# Deploy Keycloak with the realm import -kubectl run keycloak --image=quay.io/keycloak/keycloak:25.0.1 --port=8080 \ - --overrides='{ - "spec": { - "containers": [{ - "name": "keycloak", - "image": "quay.io/keycloak/keycloak:25.0.1", - "args": ["start-dev", "--import-realm", "--health-enabled=true"], - "ports": [{"containerPort": 8080}], - "env": [ - {"name": "KEYCLOAK_ADMIN", "value": "admin"}, - {"name": "KEYCLOAK_ADMIN_PASSWORD", "value": "admin"} - ], - "volumeMounts": [{"name": "realm", "mountPath": "/opt/keycloak/data/import"}] - }], - "volumes": [{"name": "realm", "configMap": {"name": "keycloak-realm"}}] - } - }' -kubectl expose pod keycloak --port=8080 --name=keycloak -kubectl wait --for=condition=Ready pod/keycloak --timeout=180s -``` - -**Step 2: Deploy Gravitino** - -```bash -# Create Gravitino config (matches Docker thirdparties/gravitino setup, with s3a warehouse) -kubectl create configmap gravitino-conf --from-file=gravitino-iceberg-rest-server.conf=<(cat <<'EOF' -gravitino.iceberg-rest.httpPort = 9001 -gravitino.iceberg-rest.catalog-backend = jdbc -gravitino.iceberg-rest.uri = jdbc:h2:file:/tmp/gravitino_h2_db;AUTO_SERVER=TRUE -gravitino.iceberg-rest.jdbc-driver = org.h2.Driver -gravitino.iceberg-rest.jdbc-user = sa -gravitino.iceberg-rest.jdbc-password = -gravitino.iceberg-rest.jdbc-initialize = true -gravitino.iceberg-rest.warehouse = s3a://hive/warehouse -gravitino.authenticators = oauth -gravitino.authenticator.oauth.serverUri = http://keycloak:8080/realms/hive -gravitino.authenticator.oauth.tokenPath = /protocol/openid-connect/token -gravitino.authenticator.oauth.scope = openid catalog -gravitino.authenticator.oauth.clientId = iceberg-client -gravitino.authenticator.oauth.clientSecret = iceberg-client-secret -gravitino.authenticator.oauth.tokenValidatorClass = org.apache.gravitino.server.authentication.JwksTokenValidator -gravitino.authenticator.oauth.jwksUri = http://keycloak:8080/realms/hive/protocol/openid-connect/certs -gravitino.authenticator.oauth.provider = default -gravitino.authenticator.oauth.principalFields = sub -gravitino.authenticator.oauth.allowSkewSecs = 60 -gravitino.authenticator.oauth.serviceAudience = hive-iceberg -EOF -) - -# Deploy Gravitino Iceberg REST server -kubectl run gravitino --image=apache/gravitino-iceberg-rest:1.0.0 --port=9001 \ - --overrides='{ - "spec": { - "containers": [{ - "name": "gravitino", - "image": "apache/gravitino-iceberg-rest:1.0.0", - "command": ["/bin/bash", "-c"], - "args": ["cp /tmp/gravitino-conf/gravitino-iceberg-rest-server.conf /root/gravitino-iceberg-rest-server/conf/gravitino-iceberg-rest-server.conf && mkdir -p /root/gravitino-iceberg-rest-server/libs && curl -sL -o /root/gravitino-iceberg-rest-server/libs/h2-2.2.220.jar https://repo1.maven.org/maven2/com/h2database/h2/2.2.220/h2-2.2.220.jar && /bin/bash /root/gravitino-iceberg-rest-server/bin/iceberg-rest-server.sh start && tail -f /dev/null"], - "ports": [{"containerPort": 9001}], - "volumeMounts": [{"name": "conf", "mountPath": "/tmp/gravitino-conf"}] - }], - "volumes": [{"name": "conf", "configMap": {"name": "gravitino-conf"}}] - } - }' -kubectl expose pod gravitino --port=9001 --name=gravitino -kubectl wait --for=condition=Ready pod/gravitino --timeout=120s -``` - -**Step 3: Install Hive with Gravitino as external catalog** - -**CLI:** - -```bash -helm install hive ./helm/hive-operator \ - --set cluster.metastore.enabled=false \ - --set cluster.zookeeper.quorum="zookeeper:2181" \ - --set cluster.storage.coreSiteOverrides."fs\.defaultFS"="s3a://hive" \ - --set cluster.storage.coreSiteOverrides."fs\.s3a\.endpoint"="http://ozone-s3g-rest:9878" \ - --set-string cluster.storage.coreSiteOverrides."fs\.s3a\.path\.style\.access"=true \ - --set 'cluster.storage.envVars[0].name=HADOOP_OPTIONAL_TOOLS' \ - --set 'cluster.storage.envVars[0].value=hadoop-aws' \ - --set 'cluster.storage.envVars[1].name=AWS_ACCESS_KEY_ID' \ - --set 'cluster.storage.envVars[1].value=ozone' \ - --set 'cluster.storage.envVars[2].name=AWS_SECRET_ACCESS_KEY' \ - --set 'cluster.storage.envVars[2].value=ozone' \ - --set 'cluster.hiveServer2.configOverrides.metastore\.catalog\.default=ice01' \ - --set 'cluster.hiveServer2.configOverrides.metastore\.client\.impl=org.apache.iceberg.hive.client.HiveRESTCatalogClient' \ - --set 'cluster.hiveServer2.configOverrides.iceberg\.catalog\.ice01\.uri=http://gravitino:9001/iceberg' \ - --set 'cluster.hiveServer2.configOverrides.iceberg\.catalog\.ice01\.type=rest' \ - --set 'cluster.hiveServer2.configOverrides.iceberg\.catalog\.ice01\.rest\.auth\.type=oauth2' \ - --set 'cluster.hiveServer2.configOverrides.iceberg\.catalog\.ice01\.oauth2-server-uri=http://keycloak:8080/realms/hive/protocol/openid-connect/token' \ - --set 'cluster.hiveServer2.configOverrides.iceberg\.catalog\.ice01\.credential=iceberg-client:iceberg-client-secret' \ - --set-string 'cluster.hiveServer2.configOverrides.hive\.scheduled\.queries\.executor\.enabled=false' \ - --set-string 'cluster.hiveServer2.configOverrides.hive\.materializedview\.rebuild\.incremental=false' \ - --set 'cluster.hiveServer2.configOverrides.hive\.metastore\.transactional\.event\.listeners=' \ - --set-string 'cluster.hiveServer2.configOverrides.hive\.notification\.event\.poll\.interval=0' \ - --set-string 'cluster.hiveServer2.configOverrides.hive\.stats\.autogather=false' \ - --set-string 'cluster.hiveServer2.configOverrides.hive\.stats\.fetch\.column\.stats=false' \ - --set-string 'cluster.hiveServer2.configOverrides.hive\.stats\.estimate=false' -``` - -**Values file:** - -```yaml -# values-gravitino.yaml -cluster: - metastore: - enabled: false - - zookeeper: - quorum: "zookeeper:2181" - - storage: - coreSiteOverrides: - fs.defaultFS: "s3a://hive" - fs.s3a.endpoint: "http://ozone-s3g-rest:9878" - fs.s3a.path.style.access: "true" - envVars: - - name: HADOOP_OPTIONAL_TOOLS - value: "hadoop-aws" - - name: AWS_ACCESS_KEY_ID - value: "ozone" - - name: AWS_SECRET_ACCESS_KEY - value: "ozone" - - hiveServer2: - configOverrides: - # Iceberg REST catalog connection - metastore.catalog.default: "ice01" - metastore.client.impl: "org.apache.iceberg.hive.client.HiveRESTCatalogClient" - iceberg.catalog.ice01.uri: "http://gravitino:9001/iceberg" - iceberg.catalog.ice01.type: "rest" - iceberg.catalog.ice01.rest.auth.type: "oauth2" - iceberg.catalog.ice01.oauth2-server-uri: "http://keycloak:8080/realms/hive/protocol/openid-connect/token" - iceberg.catalog.ice01.credential: "iceberg-client:iceberg-client-secret" - # Disable HMS-dependent features (not available with REST catalog) - hive.scheduled.queries.executor.enabled: "false" - hive.materializedview.rebuild.incremental: "false" - hive.metastore.transactional.event.listeners: "" - hive.notification.event.poll.interval: "0" - hive.stats.autogather: "false" - hive.stats.fetch.column.stats: "false" - hive.stats.estimate: "false" -``` - -```bash -helm install hive ./helm/hive-operator -f values-gravitino.yaml -``` - -**Test the connection:** - -```bash -kubectl exec -it deployment/hive-hiveserver2 -- beeline -u "jdbc:hive2://localhost:10000/" \ - -e "CREATE TABLE test (id INT, name STRING); INSERT INTO test VALUES (1, 'hello'); SELECT * FROM test;" -``` - -**Cleanup:** - -```bash -helm uninstall hive -kubectl delete pod gravitino keycloak -kubectl delete svc gravitino keycloak -kubectl delete configmap keycloak-realm gravitino-conf -``` - ---- - ### Custom Replicas and Resources **Values file:** @@ -879,6 +538,7 @@ beeline -u "jdbc:hive2://localhost:10000/" | `cluster.enabled` | `true` | Create a HiveCluster CR (set `false` to install only the operator) | | `cluster.name` | `hive` | HiveCluster resource name | | `cluster.image` | `apache/hive:4.3.0-SNAPSHOT` | Hive component image | +| `cluster.imagePullPolicy` | `IfNotPresent` | Image pull policy: `Always`, `Never`, or `IfNotPresent` | ### Database (Required) @@ -918,6 +578,8 @@ beeline -u "jdbc:hive2://localhost:10000/" | `cluster.metastore.warehouseDir` | `/hive/warehouse` | Warehouse directory | | `cluster.metastore.resources` | `{}` | CPU/memory | | `cluster.metastore.configOverrides` | `{}` | Extra `metastore-site.xml` properties | +| `cluster.metastore.extraVolumes` | `[]` | Additional volumes for Metastore pods | +| `cluster.metastore.extraVolumeMounts` | `[]` | Additional volume mounts for Metastore containers | ### HiveServer2 @@ -925,11 +587,11 @@ beeline -u "jdbc:hive2://localhost:10000/" |-------|---------|-------------| | `cluster.hiveServer2.replicas` | `2` | Replica count | | `cluster.hiveServer2.serviceType` | `ClusterIP` | K8s Service type | -| `cluster.hiveServer2.thriftPort` | `10000` | Thrift port | -| `cluster.hiveServer2.webUiPort` | `10002` | Web UI port | | `cluster.hiveServer2.resources` | `{}` | CPU/memory | -| `cluster.hiveServer2.configOverrides` | `{}` | Extra `hive-site.xml` properties | +| `cluster.hiveServer2.configOverrides` | `{}` | Extra `hive-site.xml` properties (use `hive.server2.thrift.port` / `hive.server2.webui.port` to override ports) | | `cluster.hiveServer2.externalJars` | `[]` | HS2-specific JARs | +| `cluster.hiveServer2.extraVolumes` | `[]` | Additional volumes for HS2 pods | +| `cluster.hiveServer2.extraVolumeMounts` | `[]` | Additional volume mounts for HS2 containers | ### LLAP @@ -941,6 +603,9 @@ beeline -u "jdbc:hive2://localhost:10000/" | `cluster.llap.memoryMb` | `1024` | Memory per daemon (MB) | | `cluster.llap.serviceHosts` | `@llap0` | LLAP ZK identity | | `cluster.llap.resources` | `{}` | CPU/memory | +| `cluster.llap.configOverrides` | `{}` | Extra LLAP config properties | +| `cluster.llap.extraVolumes` | `[]` | Additional volumes for LLAP pods | +| `cluster.llap.extraVolumeMounts` | `[]` | Additional volume mounts for LLAP containers | ### Tez AM @@ -951,6 +616,9 @@ beeline -u "jdbc:hive2://localhost:10000/" | `cluster.tezAm.scratchStorageSize` | `1Gi` | Shared scratch PVC size | | `cluster.tezAm.scratchStorageClassName` | | StorageClass (must support RWX) | | `cluster.tezAm.resources` | `{}` | CPU/memory | +| `cluster.tezAm.configOverrides` | `{}` | Extra TezAM config properties | +| `cluster.tezAm.extraVolumes` | `[]` | Additional volumes for TezAM pods | +| `cluster.tezAm.extraVolumeMounts` | `[]` | Additional volume mounts for TezAM containers | --- diff --git a/packaging/src/kubernetes/config/samples/hivecluster-full-ha.yaml b/packaging/src/kubernetes/config/samples/hivecluster-full-ha.yaml index 79612607f1c9..cc65852d4f35 100644 --- a/packaging/src/kubernetes/config/samples/hivecluster-full-ha.yaml +++ b/packaging/src/kubernetes/config/samples/hivecluster-full-ha.yaml @@ -14,7 +14,7 @@ # limitations under the License. # Full HiveCluster (HA): All four services with LLAP, TezAM, and multiple replicas -apiVersion: hive.apache.org/v1alpha1 +apiVersion: hive.apache.org/v1 kind: HiveCluster metadata: name: hive @@ -34,9 +34,6 @@ spec: key: password driverJarUrl: "https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" warehouseDir: "/hive/warehouse" - configOverrides: - metastore.catalog.servlet.port: "9001" - metastore.catalog.servlet.auth: "none" hiveServer2: replicas: 2 diff --git a/packaging/src/kubernetes/config/samples/hivecluster-minimal.yaml b/packaging/src/kubernetes/config/samples/hivecluster-minimal.yaml index e42b10396dd5..dfb44f04f741 100644 --- a/packaging/src/kubernetes/config/samples/hivecluster-minimal.yaml +++ b/packaging/src/kubernetes/config/samples/hivecluster-minimal.yaml @@ -14,7 +14,7 @@ # limitations under the License. # Minimal HiveCluster: Metastore + HiveServer2 with external S3-compatible storage -apiVersion: hive.apache.org/v1alpha1 +apiVersion: hive.apache.org/v1 kind: HiveCluster metadata: name: hive diff --git a/packaging/src/kubernetes/helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml b/packaging/src/kubernetes/helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml index 23ccb2c5112a..99768633a128 100644 --- a/packaging/src/kubernetes/helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml +++ b/packaging/src/kubernetes/helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml @@ -13,7 +13,7 @@ spec: singular: hivecluster scope: Namespaced versions: - - name: v1alpha1 + - name: v1 schema: openAPIV3Schema: properties: @@ -115,6 +115,7 @@ spec: type: integer type: object replicas: + default: 1 description: Number of replicas type: integer resources: @@ -127,22 +128,19 @@ spec: description: "Memory limit (e.g. 2Gi, 1024Mi)" type: string requestsCpu: + default: 500m description: "CPU request (e.g. 500m, 1)" type: string requestsMemory: + default: 1Gi description: "Memory request (e.g. 1Gi, 512Mi)" type: string type: object serviceType: + default: ClusterIP description: "Kubernetes Service type: ClusterIP, LoadBalancer,\ \ or NodePort" type: string - thriftPort: - description: HiveServer2 Thrift port - type: integer - webUiPort: - description: HiveServer2 Web UI port - type: integer type: object x-kubernetes-preserve-unknown-fields: true image: @@ -152,7 +150,7 @@ spec: description: "Image pull policy: Always, Never, or IfNotPresent" type: string llap: - description: LLAP daemon configuration. Disabled by default. + description: LLAP daemon configuration. Enabled by default. properties: configOverrides: additionalProperties: @@ -160,9 +158,11 @@ spec: description: Additional configuration overrides as key-value pairs type: object enabled: + default: true description: Whether LLAP is enabled type: boolean executors: + default: 1 description: Number of LLAP executors per daemon type: integer extraVolumeMounts: @@ -179,6 +179,7 @@ spec: type: array x-kubernetes-preserve-unknown-fields: true memoryMb: + default: 1024 description: Memory in MB per LLAP daemon instance type: integer readinessProbe: @@ -205,6 +206,7 @@ spec: type: integer type: object replicas: + default: 1 description: Number of replicas type: integer resources: @@ -217,9 +219,11 @@ spec: description: "Memory limit (e.g. 2Gi, 1024Mi)" type: string requestsCpu: + default: 500m description: "CPU request (e.g. 500m, 1)" type: string requestsMemory: + default: 1Gi description: "Memory request (e.g. 1Gi, 512Mi)" type: string type: object @@ -258,6 +262,7 @@ spec: type: string type: object type: + default: derby description: "Database type: derby, mysql, postgres, mssql,\ \ or oracle" type: string @@ -269,6 +274,7 @@ spec: type: string type: object enabled: + default: true description: Whether the operator should deploy and manage a Metastore type: boolean externalUri: @@ -335,6 +341,7 @@ spec: type: integer type: object replicas: + default: 1 description: Number of replicas type: integer resources: @@ -347,19 +354,22 @@ spec: description: "Memory limit (e.g. 2Gi, 1024Mi)" type: string requestsCpu: + default: 500m description: "CPU request (e.g. 500m, 1)" type: string requestsMemory: + default: 1Gi description: "Memory request (e.g. 1Gi, 512Mi)" type: string type: object warehouseDir: + default: /hive/warehouse description: Warehouse directory path type: string type: object x-kubernetes-preserve-unknown-fields: true tezAm: - description: Tez Application Master configuration. Disabled by default. + description: Tez Application Master configuration. Enabled by default. properties: configOverrides: additionalProperties: @@ -367,6 +377,7 @@ spec: description: Additional configuration overrides as key-value pairs type: object enabled: + default: true description: Whether Tez AM is enabled type: boolean extraVolumeMounts: @@ -383,6 +394,7 @@ spec: type: array x-kubernetes-preserve-unknown-fields: true replicas: + default: 1 description: Number of replicas type: integer resources: @@ -395,9 +407,11 @@ spec: description: "Memory limit (e.g. 2Gi, 1024Mi)" type: string requestsCpu: + default: 500m description: "CPU request (e.g. 500m, 1)" type: string requestsMemory: + default: 1Gi description: "Memory request (e.g. 1Gi, 512Mi)" type: string type: object @@ -406,6 +420,7 @@ spec: \ ReadWriteMany access. If null, uses cluster default." type: string scratchStorageSize: + default: 1Gi description: Storage size for the shared scratch PVC (ReadWriteMany) mounted on HS2 and TezAM at /opt/hive/scratch type: string @@ -430,9 +445,14 @@ spec: this operator) properties: quorum: - description: ZooKeeper quorum connection string + description: ZooKeeper quorum connection string. This field is + strictly required. type: string + required: + - quorum type: object + required: + - zookeeper type: object x-kubernetes-preserve-unknown-fields: true status: diff --git a/packaging/src/kubernetes/helm/hive-operator/templates/hivecluster.yaml b/packaging/src/kubernetes/helm/hive-operator/templates/hivecluster.yaml index 504d6f20dc29..091ecefb3cb0 100644 --- a/packaging/src/kubernetes/helm/hive-operator/templates/hivecluster.yaml +++ b/packaging/src/kubernetes/helm/hive-operator/templates/hivecluster.yaml @@ -16,7 +16,7 @@ limitations under the License. */}} {{- if .Values.cluster.enabled }} -apiVersion: hive.apache.org/v1alpha1 +apiVersion: hive.apache.org/v1 kind: HiveCluster metadata: name: {{ include "hive-operator.clusterName" . }} @@ -76,8 +76,6 @@ spec: hiveServer2: replicas: {{ .Values.cluster.hiveServer2.replicas }} serviceType: {{ .Values.cluster.hiveServer2.serviceType | quote }} - thriftPort: {{ .Values.cluster.hiveServer2.thriftPort }} - webUiPort: {{ .Values.cluster.hiveServer2.webUiPort }} {{- if .Values.cluster.hiveServer2.resources }} resources: {{- toYaml .Values.cluster.hiveServer2.resources | nindent 6 }} diff --git a/packaging/src/kubernetes/helm/hive-operator/values.yaml b/packaging/src/kubernetes/helm/hive-operator/values.yaml index 093e104cf746..b7d75930c5b2 100644 --- a/packaging/src/kubernetes/helm/hive-operator/values.yaml +++ b/packaging/src/kubernetes/helm/hive-operator/values.yaml @@ -122,8 +122,6 @@ cluster: hiveServer2: replicas: 2 serviceType: ClusterIP - thriftPort: 10000 - webUiPort: 10002 resources: {} configOverrides: {} externalJars: [] diff --git a/packaging/src/kubernetes/pom.xml b/packaging/src/kubernetes/pom.xml index c41f18d4180c..342441730cc5 100644 --- a/packaging/src/kubernetes/pom.xml +++ b/packaging/src/kubernetes/pom.xml @@ -28,12 +28,36 @@ ../../.. - io.javaoperatorsdk operator-framework ${josdk.version} + + io.javaoperatorsdk + operator-framework-core + ${josdk.version} + + + io.fabric8 + kubernetes-client + ${fabric8.version} + + + io.fabric8 + kubernetes-httpclient-vertx + ${fabric8.version} + + + io.github.java-diff-utils + java-diff-utils + 4.17 + + + io.vertx + vertx-web-common + 4.5.27 + io.fabric8 @@ -41,7 +65,6 @@ ${fabric8.version} provided - org.apache.logging.log4j log4j-slf4j-impl @@ -75,7 +98,6 @@ - org.apache.maven.plugins maven-shade-plugin diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/HiveOperatorMain.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/HiveOperatorMain.java index ce4cfa6052c2..55bd3372a40d 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/HiveOperatorMain.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/HiveOperatorMain.java @@ -35,17 +35,7 @@ private HiveOperatorMain() { /** Starts the operator, registers reconcilers, and blocks until shutdown. */ public static void main(String[] args) { LOG.info("Starting Hive Kubernetes Operator"); - // Disable SSA-based matching for dependent resources. - // JOSDK's SSA matcher relies on Kubernetes managedFields entries - // to detect diffs. When managedFields is absent or incomplete - // (common with Docker Desktop and some K8s distributions), the - // matcher always returns "not matched", causing the operator to - // re-apply every dependent on every reconciliation and creating - // an infinite update loop. The classic GenericKubernetesResource - // matcher compares the desired spec directly and is immune to this. - Operator operator = new Operator(overrider -> overrider - .withSSABasedCreateUpdateMatchForDependentResources(false) - .withPreviousAnnotationForDependentResources(false)); + Operator operator = new Operator(); operator.register(new HiveClusterReconciler()); operator.start(); LOG.info("Hive Kubernetes Operator started successfully"); diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HadoopConfigMapDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HadoopConfigMapDependent.java index ede10d7e8036..6c0f9308dbc1 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HadoopConfigMapDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HadoopConfigMapDependent.java @@ -23,6 +23,7 @@ import io.fabric8.kubernetes.api.model.ConfigMap; import io.fabric8.kubernetes.api.model.ConfigMapBuilder; import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; import org.apache.hive.kubernetes.operator.model.HiveCluster; import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; @@ -31,8 +32,8 @@ /** Manages the Hadoop core-site.xml ConfigMap for filesystem configuration. */ @KubernetesDependent( - labelSelector = "app.kubernetes.io/component=hadoop-config," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator" + informer = @Informer(labelSelector = "app.kubernetes.io/component=hadoop-config," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") ) public class HadoopConfigMapDependent extends HiveDependentResource { diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveDependentResource.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveDependentResource.java index a4604bab3845..cc2eb0de6de0 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveDependentResource.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveDependentResource.java @@ -21,22 +21,10 @@ import java.nio.charset.StandardCharsets; import java.security.MessageDigest; import java.util.ArrayList; -import java.util.Comparator; -import java.util.stream.Collectors; -import java.util.stream.StreamSupport; - -import java.util.Iterator; import java.util.List; -import java.util.Objects; +import java.util.Map; import java.util.Optional; -import java.util.TreeMap; -import java.util.concurrent.ConcurrentHashMap; - -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.SerializationFeature; -import com.fasterxml.jackson.databind.node.ArrayNode; -import com.fasterxml.jackson.databind.node.ObjectNode; +import io.fabric8.kubernetes.api.model.AffinityBuilder; import io.fabric8.kubernetes.api.model.Container; import io.fabric8.kubernetes.api.model.ContainerBuilder; import io.fabric8.kubernetes.api.model.EnvVar; @@ -70,7 +58,7 @@ *

* Overrides {@link #getSecondaryResource} to use this dependent's own * event source instead of the generic type-based lookup. This is - * required because JOSDK 4.9.x's default implementation calls + * required because JOSDK's default implementation calls * {@code context.getSecondaryResource(type)} which throws when * multiple dependents manage the same Kubernetes resource type * (e.g. multiple ConfigMap or Service dependents). @@ -81,21 +69,10 @@ public abstract class HiveDependentResource - LAST_DESIRED_HASHES = new ConcurrentHashMap<>(); protected HiveDependentResource(Class resourceType) { super(resourceType); @@ -104,8 +81,8 @@ protected HiveDependentResource(Class resourceType) { /** * Catches 409 AlreadyExists during resource creation caused by * informer lag — the resource exists on the API server but - * the informer cache hasn't indexed it yet, so JOSDK bypasses - * {@link #match} and calls create directly. + * the informer cache hasn't indexed it yet, so JOSDK calls + * create directly. */ @Override protected R handleCreate(R desired, P primary, Context

context) { @@ -122,17 +99,6 @@ protected R handleCreate(R desired, P primary, Context

context) { } } - /** - * Disable Server-Side Apply. SSA on Docker Desktop Kubernetes causes - * dual ReplicaSet creation (two SSA applies within the same second - * produce different pod template hashes). Standard create/update - * combined with our custom hash-based {@link #match} is sufficient. - */ - @Override - protected boolean useSSA(Context

context) { - return false; - } - @Override public Optional getSecondaryResource(P primary, Context

context) { @@ -141,145 +107,22 @@ public Optional getSecondaryResource(P primary, } /** - * Custom match that compares an SHA-256 hash of the desired resource - * spec against the last applied hash. Overrides the 3-arg entry - * point because that is what JOSDK's reconcile loop actually calls. - *

- * The parent's 3-arg match delegates to a 5-arg method that calls - * {@code addMetadata()} unconditionally — writing the - * {@code javaoperatorsdk.io/previous} annotation on every - * reconciliation. On Docker Desktop, that annotation write bumps - * {@code metadata.generation}, which triggers a new informer event, - * causing an infinite reconciliation loop. - *

- * By intercepting here we avoid both the annotation write and the - * false-positive diffs from K8s-injected defaults (protocol: TCP, - * terminationGracePeriodSeconds, etc.) when the desired spec has - * not actually changed. + * Jobs and PVCs are immutable after creation — Kubernetes rejects + * any PUT that modifies spec.selector, spec.template (Job) or + * spec.resources/accessModes (PVC). Short-circuit the match to + * prevent the framework from attempting updates on these resources. */ @Override - public Matcher.Result match(R actual, P primary, - Context

context) { - R desired = desired(primary, context); - String resourceKey = desired.getKind() - + "/" + desired.getMetadata().getNamespace() - + "/" + desired.getMetadata().getName(); - String desiredHash = computeHash(desired); - if (actual == null) { - if (desiredHash != null) { - String previousHash = LAST_DESIRED_HASHES.get(resourceKey); - if (Objects.equals(previousHash, desiredHash)) { - // Resource was created in a previous reconciliation but - // the informer hasn't indexed it yet. Returning false - // would trigger another SSA apply, which fires another - // informer event, creating an infinite reconciliation - // loop on Docker Desktop. Skip the re-creation. - LOG.debug("Resource {} already created (informer lag), " - + "skipping re-create", resourceKey); - return Matcher.Result.computed(true, desired); - } - // First creation — cache the hash so the next - // reconciliation can detect informer lag. - LOG.info("Creating resource {}", resourceKey); - LAST_DESIRED_HASHES.put(resourceKey, desiredHash); - } - return Matcher.Result.computed(false, desired); - } - if (desiredHash == null) { - // Serialization failed — delegate to parent which will - // call addMetadata + the real matcher - return super.match(actual, primary, context); - } - // Jobs and PVCs are immutable after creation — never update. - String kind = actual.getKind(); - if ("Job".equals(kind) || "PersistentVolumeClaim".equals(kind)) { - LAST_DESIRED_HASHES.put(resourceKey, desiredHash); - return Matcher.Result.computed(true, desired); - } - String previousHash = LAST_DESIRED_HASHES.get(resourceKey); - if (previousHash == null) { - // First reconciliation after operator start — the resource - // already exists so seed the cache without triggering an - // update. This prevents a gratuitous rolling update caused - // by K8s default-value injection (protocol: TCP, etc.). - LOG.info("Seeding hash for existing resource {}, skipping update", - resourceKey); - LAST_DESIRED_HASHES.put(resourceKey, desiredHash); - return Matcher.Result.computed(true, desired); - } - if (desiredHash.equals(previousHash)) { - LOG.debug("Desired spec unchanged for {}, skipping update", - resourceKey); - return Matcher.Result.computed(true, desired); - } - LOG.info("Desired spec changed for {}, will update", resourceKey); - LAST_DESIRED_HASHES.put(resourceKey, desiredHash); - return Matcher.Result.computed(false, desired); - } - - private String computeHash(R resource) { - try { - JsonNode tree = MAPPER.valueToTree(resource); - sortJsonNode(tree); - String json = MAPPER.writeValueAsString(tree); - MessageDigest digest = MessageDigest.getInstance("SHA-256"); - byte[] hash = digest.digest( - json.getBytes(StandardCharsets.UTF_8)); - StringBuilder sb = new StringBuilder(64); - for (byte b : hash) { - sb.append(String.format("%02x", b)); - } - return sb.toString(); - } catch (Exception e) { - LOG.warn("Failed to compute hash for resource {}: {}", - resource.getMetadata().getName(), e.getMessage()); - return null; - } - } - - /** Recursively sort all object node keys for deterministic JSON. */ - private static void sortJsonNode(JsonNode node) { - if (node.isObject()) { - ObjectNode obj = (ObjectNode) node; - TreeMap sorted = new TreeMap<>(); - Iterator fieldNames = obj.fieldNames(); - while (fieldNames.hasNext()) { - String name = fieldNames.next(); - JsonNode child = obj.get(name); - sortJsonNode(child); - sorted.put(name, child); - } - obj.removeAll(); - sorted.forEach(obj::set); - } else if (node.isArray()) { - ArrayNode arr = (ArrayNode) node; - for (int i = 0; i < arr.size(); i++) { - sortJsonNode(arr.get(i)); + public Matcher.Result match(R actualResource, R desired, + P primary, Context

context) { + if (actualResource != null) { + String kind = actualResource.getKind(); + if ("Job".equals(kind) + || "PersistentVolumeClaim".equals(kind)) { + return Matcher.Result.nonComputed(true); } - sortArrayNode(arr); } - } - - /** - * Sort array elements by a stable key to make hashing order-independent. - * Uses "name" field if present (env vars, volumes, containers, ports), - * falls back to "mountPath" (volume mounts), then serialized form. - */ - private static void sortArrayNode(ArrayNode arr) { - if (arr.size() <= 1 || !arr.get(0).isObject()) { - return; - } - - List sortedElements = StreamSupport.stream(arr.spliterator(), false) - .sorted(Comparator.comparing(node -> - node.has("name") ? node.get("name").asText() : - node.has("mountPath") ? node.get("mountPath").asText() : - node.toString() - )) - .collect(Collectors.toList()); - - arr.removeAll(); - sortedElements.forEach(arr::add); + return super.match(actualResource, desired, primary, context); } /** @@ -417,6 +260,32 @@ protected static ResourceRequirements buildResources(ResourceRequirementsSpec sp return builder.build(); } + /** + * Sets a preferred pod anti-affinity on the pod spec if no affinity is + * already defined. This spreads replicas across nodes while allowing + * future user-defined affinity to take precedence. + */ + protected static void applySpreadAffinityIfAbsent( + io.fabric8.kubernetes.api.model.PodSpec podSpec, + Map selectorLabels) { + if (podSpec.getAffinity() != null) { + return; + } + podSpec.setAffinity(new AffinityBuilder() + .withNewPodAntiAffinity() + .addNewPreferredDuringSchedulingIgnoredDuringExecution() + .withWeight(100) + .withNewPodAffinityTerm() + .withNewLabelSelector() + .withMatchLabels(selectorLabels) + .endLabelSelector() + .withTopologyKey("kubernetes.io/hostname") + .endPodAffinityTerm() + .endPreferredDuringSchedulingIgnoredDuringExecution() + .endPodAntiAffinity() + .build()); + } + /** * Builds an init container that downloads external JARs via wget * (for http/https URLs) or hadoop fs (for HDFS/cloud paths). @@ -434,9 +303,12 @@ protected static Container buildExternalJarsInitContainer( for (String jarUrl : externalJars) { if (jarUrl.startsWith("http://") || jarUrl.startsWith("https://")) { - cmd.append("wget -q -P ").append(targetDir).append(" '").append(jarUrl).append("' && "); + cmd.append("wget -q --tries=3 --waitretry=5 -P ").append(targetDir) + .append(" '").append(jarUrl).append("' && "); } else { - cmd.append("hadoop fs -copyToLocal '").append(jarUrl).append("' ").append(targetDir).append("/ && "); + cmd.append("{ ok=0; for i in 1 2 3; do hadoop fs -copyToLocal '").append(jarUrl) + .append("' ").append(targetDir).append("/ && ok=1 && break || sleep 5; done; ") + .append("[ $ok -eq 1 ]; } && "); } } cmd.append("echo 'All external JARs downloaded successfully.'"); diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ConfigMapDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ConfigMapDependent.java index 088cfbe2fa2a..9bb0597cc960 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ConfigMapDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ConfigMapDependent.java @@ -23,6 +23,7 @@ import io.fabric8.kubernetes.api.model.ConfigMap; import io.fabric8.kubernetes.api.model.ConfigMapBuilder; import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; import org.apache.hive.kubernetes.operator.model.HiveCluster; import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; @@ -32,8 +33,8 @@ /** Manages the hive-site.xml ConfigMap for HiveServer2. */ @KubernetesDependent( - labelSelector = "app.kubernetes.io/component=hiveserver2," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator" + informer = @Informer(labelSelector = "app.kubernetes.io/component=hiveserver2," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") ) public class HiveServer2ConfigMapDependent extends HiveDependentResource { diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2DeploymentDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2DeploymentDependent.java index 29e3db3f31f0..ccb3048dea98 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2DeploymentDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2DeploymentDependent.java @@ -30,18 +30,20 @@ import io.fabric8.kubernetes.api.model.apps.Deployment; import io.fabric8.kubernetes.api.model.apps.DeploymentBuilder; import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; import org.apache.hive.kubernetes.operator.model.HiveCluster; import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; import org.apache.hive.kubernetes.operator.model.spec.HiveServer2Spec; +import org.apache.hive.kubernetes.operator.util.ConfigUtils; import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; import org.apache.hive.kubernetes.operator.util.Labels; /** Manages the Kubernetes Deployment for HiveServer2. */ @KubernetesDependent( - labelSelector = "app.kubernetes.io/component=hiveserver2," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator" + informer = @Informer(labelSelector = "app.kubernetes.io/component=hiveserver2," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") ) public class HiveServer2DeploymentDependent extends HiveDependentResource { @@ -88,36 +90,56 @@ protected Deployment desired(HiveCluster hiveCluster, spec.llap().serviceHosts(), null)); } + int metastorePort = ConfigUtils.getInt( + spec.metastore().configOverrides(), + ConfigUtils.METASTORE_THRIFT_PORT_KEY, + ConfigUtils.METASTORE_THRIFT_PORT_HIVE_KEY, + ConfigUtils.METASTORE_THRIFT_PORT_DEFAULT); String metastoreUri = spec.metastore().isEnabled() ? - "thrift://" + hiveCluster.getMetadata().getName() + "-metastore:9083" : + "thrift://" + hiveCluster.getMetadata().getName() + + "-metastore:" + metastorePort : spec.metastore().externalUri(); StringBuilder serviceOpts = new StringBuilder(); if (metastoreUri != null && !metastoreUri.isEmpty()) { - serviceOpts.append("-Dhive.metastore.uris=").append(metastoreUri); + serviceOpts.append("-D") + .append(ConfigUtils.HIVE_METASTORE_URIS_KEY) + .append("=").append(metastoreUri); } if (spec.llap().isEnabled()) { - serviceOpts.append(" -Dhive.execution.mode=llap"); - serviceOpts.append(" -Dhive.llap.daemon.service.hosts=") - .append(spec.llap().serviceHosts()); + serviceOpts.append(" -D") + .append(ConfigUtils.HIVE_EXECUTION_MODE_KEY) + .append("=llap"); + serviceOpts.append(" -D") + .append(ConfigUtils.HIVE_LLAP_DAEMON_SERVICE_HOSTS_KEY) + .append("=").append(spec.llap().serviceHosts()); } if (spec.tezAm().isEnabled()) { - serviceOpts.append(" -Dhive.zookeeper.quorum=") - .append(spec.zookeeper().quorum()); + serviceOpts.append(" -D") + .append(ConfigUtils.HIVE_ZOOKEEPER_QUORUM_KEY) + .append("=").append(spec.zookeeper().quorum()); } envVars.add(new EnvVar("SERVICE_OPTS", serviceOpts.toString(), null)); + int hs2ThriftPort = ConfigUtils.getInt( + hs2.configOverrides(), + ConfigUtils.HIVE_SERVER2_THRIFT_PORT_KEY, + null, ConfigUtils.HIVE_SERVER2_THRIFT_PORT_DEFAULT); + int hs2WebUiPort = ConfigUtils.getInt( + hs2.configOverrides(), + ConfigUtils.HIVE_SERVER2_WEBUI_PORT_KEY, + null, ConfigUtils.HIVE_SERVER2_WEBUI_PORT_DEFAULT); List ports = List.of( new ContainerPortBuilder() .withName("thrift") - .withContainerPort(hs2.thriftPort()).build(), + .withContainerPort(hs2ThriftPort).build(), new ContainerPortBuilder() .withName("webui") - .withContainerPort(hs2.webUiPort()).build() + .withContainerPort(hs2WebUiPort).build() ); - Probe readinessProbe = buildTcpProbe(hs2.thriftPort(), hs2.readinessProbe(), 15, 10, 3); - Probe livenessProbe = buildTcpProbe(hs2.thriftPort(), hs2.livenessProbe(), 120, 30, 10); + Probe readinessProbe = buildTcpProbe(hs2ThriftPort, hs2.readinessProbe(), 15, 10, 3); + Probe livenessProbe = buildTcpProbe(hs2ThriftPort, hs2.livenessProbe(), 120, 30, 10); boolean tezAmEnabled = spec.tezAm().isEnabled(); @@ -208,6 +230,9 @@ protected Deployment desired(HiveCluster hiveCluster, .endSpec() .build(); + applySpreadAffinityIfAbsent( + deployment.getSpec().getTemplate().getSpec(), selectorLabels); + if (spec.volumes() != null) { deployment.getSpec().getTemplate().getSpec().getVolumes().addAll(spec.volumes()); } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ServiceDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ServiceDependent.java index 065524b17753..a9707ac0dfa6 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ServiceDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ServiceDependent.java @@ -22,15 +22,17 @@ import io.fabric8.kubernetes.api.model.Service; import io.fabric8.kubernetes.api.model.ServiceBuilder; import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; import org.apache.hive.kubernetes.operator.model.HiveCluster; import org.apache.hive.kubernetes.operator.model.spec.HiveServer2Spec; +import org.apache.hive.kubernetes.operator.util.ConfigUtils; import org.apache.hive.kubernetes.operator.util.Labels; /** Manages the Kubernetes Service for HiveServer2 (Thrift and WebUI ports). */ @KubernetesDependent( - labelSelector = "app.kubernetes.io/component=hiveserver2," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator" + informer = @Informer(labelSelector = "app.kubernetes.io/component=hiveserver2," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") ) public class HiveServer2ServiceDependent extends HiveDependentResource { @@ -43,6 +45,12 @@ public HiveServer2ServiceDependent() { protected Service desired(HiveCluster hiveCluster, Context context) { HiveServer2Spec hs2 = hiveCluster.getSpec().hiveServer2(); + int thriftPort = ConfigUtils.getInt(hs2.configOverrides(), + ConfigUtils.HIVE_SERVER2_THRIFT_PORT_KEY, + null, ConfigUtils.HIVE_SERVER2_THRIFT_PORT_DEFAULT); + int webUiPort = ConfigUtils.getInt(hs2.configOverrides(), + ConfigUtils.HIVE_SERVER2_WEBUI_PORT_KEY, + null, ConfigUtils.HIVE_SERVER2_WEBUI_PORT_DEFAULT); return new ServiceBuilder() .withNewMetadata() @@ -57,13 +65,13 @@ protected Service desired(HiveCluster hiveCluster, HiveServer2DeploymentDependent.COMPONENT)) .addNewPort() .withName("thrift") - .withPort(hs2.thriftPort()) - .withTargetPort(new IntOrString(hs2.thriftPort())) + .withPort(thriftPort) + .withTargetPort(new IntOrString(thriftPort)) .endPort() .addNewPort() .withName("webui") - .withPort(hs2.webUiPort()) - .withTargetPort(new IntOrString(hs2.webUiPort())) + .withPort(webUiPort) + .withTargetPort(new IntOrString(webUiPort)) .endPort() .endSpec() .build(); diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapConfigMapDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapConfigMapDependent.java index f945f0a75f23..2ad6955dadb8 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapConfigMapDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapConfigMapDependent.java @@ -23,6 +23,7 @@ import io.fabric8.kubernetes.api.model.ConfigMap; import io.fabric8.kubernetes.api.model.ConfigMapBuilder; import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; import org.apache.hive.kubernetes.operator.model.HiveCluster; import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; @@ -31,8 +32,8 @@ /** Manages the llap-daemon-site.xml ConfigMap for LLAP daemons. */ @KubernetesDependent( - labelSelector = "app.kubernetes.io/component=llap," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator" + informer = @Informer(labelSelector = "app.kubernetes.io/component=llap," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") ) public class LlapConfigMapDependent extends HiveDependentResource { diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapServiceDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapServiceDependent.java index 30d5933a1b4f..108f29347a97 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapServiceDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapServiceDependent.java @@ -22,6 +22,7 @@ import io.fabric8.kubernetes.api.model.Service; import io.fabric8.kubernetes.api.model.ServiceBuilder; import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; import org.apache.hive.kubernetes.operator.model.HiveCluster; import org.apache.hive.kubernetes.operator.util.Labels; @@ -31,8 +32,8 @@ * Required by the StatefulSet for stable DNS entries and ZooKeeper registration. */ @KubernetesDependent( - labelSelector = "app.kubernetes.io/component=llap," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator" + informer = @Informer(labelSelector = "app.kubernetes.io/component=llap," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") ) public class LlapServiceDependent extends HiveDependentResource { diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapStatefulSetDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapStatefulSetDependent.java index d36b0ad0d4d1..c8c044d22ce9 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapStatefulSetDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapStatefulSetDependent.java @@ -30,6 +30,7 @@ import io.fabric8.kubernetes.api.model.apps.StatefulSet; import io.fabric8.kubernetes.api.model.apps.StatefulSetBuilder; import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; import org.apache.hive.kubernetes.operator.model.HiveCluster; import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; @@ -43,8 +44,8 @@ * Uses StatefulSet for stable pod identities required by ZooKeeper registration. */ @KubernetesDependent( - labelSelector = "app.kubernetes.io/component=llap," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator" + informer = @Informer(labelSelector = "app.kubernetes.io/component=llap," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") ) public class LlapStatefulSetDependent extends HiveDependentResource { @@ -155,6 +156,9 @@ protected StatefulSet desired(HiveCluster hiveCluster, .endSpec() .build(); + applySpreadAffinityIfAbsent( + statefulSet.getSpec().getTemplate().getSpec(), selectorLabels); + if (spec.volumes() != null) { statefulSet.getSpec().getTemplate().getSpec().getVolumes().addAll(spec.volumes()); } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreConfigMapDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreConfigMapDependent.java index eab7dc5536e4..b429335f76e0 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreConfigMapDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreConfigMapDependent.java @@ -23,6 +23,7 @@ import io.fabric8.kubernetes.api.model.ConfigMap; import io.fabric8.kubernetes.api.model.ConfigMapBuilder; import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; import org.apache.hive.kubernetes.operator.model.HiveCluster; import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; @@ -31,8 +32,8 @@ /** Manages the metastore-site.xml ConfigMap for the Hive Metastore. */ @KubernetesDependent( - labelSelector = "app.kubernetes.io/component=metastore," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator" + informer = @Informer(labelSelector = "app.kubernetes.io/component=metastore," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") ) public class MetastoreConfigMapDependent extends HiveDependentResource { diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreDeploymentDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreDeploymentDependent.java index f42091c58b5e..46a95426c969 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreDeploymentDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreDeploymentDependent.java @@ -32,18 +32,20 @@ import io.fabric8.kubernetes.api.model.apps.Deployment; import io.fabric8.kubernetes.api.model.apps.DeploymentBuilder; import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; import org.apache.hive.kubernetes.operator.model.HiveCluster; import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; import org.apache.hive.kubernetes.operator.model.spec.DatabaseConfig; +import org.apache.hive.kubernetes.operator.util.ConfigUtils; import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; import org.apache.hive.kubernetes.operator.util.Labels; /** Manages the Kubernetes Deployment for the Hive Metastore. */ @KubernetesDependent( - labelSelector = "app.kubernetes.io/component=metastore," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator" + informer = @Informer(labelSelector = "app.kubernetes.io/component=metastore," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") ) public class MetastoreDeploymentDependent extends HiveDependentResource { @@ -70,15 +72,20 @@ protected Deployment desired(HiveCluster hiveCluster, envVars.addAll(spec.envVars()); } + int thriftPort = ConfigUtils.getInt( + spec.metastore().configOverrides(), + ConfigUtils.METASTORE_THRIFT_PORT_KEY, + ConfigUtils.METASTORE_THRIFT_PORT_HIVE_KEY, + ConfigUtils.METASTORE_THRIFT_PORT_DEFAULT); List ports = List.of( new ContainerPortBuilder() - .withName("thrift").withContainerPort(9083).build(), + .withName("thrift").withContainerPort(thriftPort).build(), new ContainerPortBuilder() .withName("rest").withContainerPort(9001).build() ); - Probe readinessProbe = buildTcpProbe(9083, spec.metastore().readinessProbe(), 15, 10, 3); - Probe livenessProbe = buildTcpProbe(9083, spec.metastore().livenessProbe(), 60, 30, 5); + Probe readinessProbe = buildTcpProbe(thriftPort, spec.metastore().readinessProbe(), 15, 10, 3); + Probe livenessProbe = buildTcpProbe(thriftPort, spec.metastore().livenessProbe(), 60, 30, 5); List initContainers = new ArrayList<>(); List volumeMounts = new ArrayList<>(); @@ -145,6 +152,9 @@ protected Deployment desired(HiveCluster hiveCluster, .endSpec() .build(); + applySpreadAffinityIfAbsent( + deployment.getSpec().getTemplate().getSpec(), selectorLabels); + if (spec.volumes() != null) { deployment.getSpec().getTemplate().getSpec().getVolumes().addAll(spec.volumes()); } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreServiceDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreServiceDependent.java index cf0537897512..2620a24e01d7 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreServiceDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreServiceDependent.java @@ -22,14 +22,16 @@ import io.fabric8.kubernetes.api.model.Service; import io.fabric8.kubernetes.api.model.ServiceBuilder; import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.util.ConfigUtils; import org.apache.hive.kubernetes.operator.util.Labels; /** Manages the Kubernetes Service for the Hive Metastore (Thrift + REST ports). */ @KubernetesDependent( - labelSelector = "app.kubernetes.io/component=metastore," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator" + informer = @Informer(labelSelector = "app.kubernetes.io/component=metastore," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") ) public class MetastoreServiceDependent extends HiveDependentResource { @@ -41,6 +43,11 @@ public MetastoreServiceDependent() { @Override protected Service desired(HiveCluster hiveCluster, Context context) { + int thriftPort = ConfigUtils.getInt( + hiveCluster.getSpec().metastore().configOverrides(), + ConfigUtils.METASTORE_THRIFT_PORT_KEY, + ConfigUtils.METASTORE_THRIFT_PORT_HIVE_KEY, + ConfigUtils.METASTORE_THRIFT_PORT_DEFAULT); return new ServiceBuilder() .withNewMetadata() .withName(hiveCluster.getMetadata().getName() + "-metastore") @@ -54,8 +61,8 @@ protected Service desired(HiveCluster hiveCluster, MetastoreDeploymentDependent.COMPONENT)) .addNewPort() .withName("thrift") - .withPort(9083) - .withTargetPort(new IntOrString(9083)) + .withPort(thriftPort) + .withTargetPort(new IntOrString(thriftPort)) .endPort() .addNewPort() .withName("rest") diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/SchemaInitJobDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/SchemaInitJobDependent.java index edc0d386f75a..a23c0c477436 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/SchemaInitJobDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/SchemaInitJobDependent.java @@ -28,6 +28,7 @@ import io.fabric8.kubernetes.api.model.batch.v1.Job; import io.fabric8.kubernetes.api.model.batch.v1.JobBuilder; import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; import org.apache.hive.kubernetes.operator.model.HiveCluster; import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; @@ -40,8 +41,8 @@ * database schema using schematool. */ @KubernetesDependent( - labelSelector = "app.kubernetes.io/component=schema-init," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator" + informer = @Informer(labelSelector = "app.kubernetes.io/component=schema-init," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") ) public class SchemaInitJobDependent extends HiveDependentResource { diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/ScratchPvcDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/ScratchPvcDependent.java index 9b1a64af73db..6a645f043574 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/ScratchPvcDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/ScratchPvcDependent.java @@ -24,6 +24,7 @@ import io.fabric8.kubernetes.api.model.PersistentVolumeClaimBuilder; import io.fabric8.kubernetes.api.model.Quantity; import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; import org.apache.hive.kubernetes.operator.model.HiveCluster; import org.apache.hive.kubernetes.operator.model.spec.TezAmSpec; @@ -42,8 +43,8 @@ * simultaneously. */ @KubernetesDependent( - labelSelector = "app.kubernetes.io/component=scratch," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator" + informer = @Informer(labelSelector = "app.kubernetes.io/component=scratch," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") ) public class ScratchPvcDependent extends HiveDependentResource { diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmServiceDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmServiceDependent.java index adbe40ce4717..781685286038 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmServiceDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmServiceDependent.java @@ -21,6 +21,7 @@ import io.fabric8.kubernetes.api.model.Service; import io.fabric8.kubernetes.api.model.ServiceBuilder; import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; import org.apache.hive.kubernetes.operator.model.HiveCluster; import org.apache.hive.kubernetes.operator.util.Labels; @@ -31,8 +32,8 @@ * HiveServer2 can resolve TezAM pod hostnames for RPC communication. */ @KubernetesDependent( - labelSelector = "app.kubernetes.io/component=tezam," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator" + informer = @Informer(labelSelector = "app.kubernetes.io/component=tezam," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") ) public class TezAmServiceDependent extends HiveDependentResource { diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmStatefulSetDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmStatefulSetDependent.java index a037baae6050..5cc7a3f800f3 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmStatefulSetDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmStatefulSetDependent.java @@ -27,6 +27,7 @@ import io.fabric8.kubernetes.api.model.apps.StatefulSet; import io.fabric8.kubernetes.api.model.apps.StatefulSetBuilder; import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; import org.apache.hive.kubernetes.operator.model.HiveCluster; import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; @@ -43,8 +44,8 @@ * so the hostname must be resolvable within the cluster. */ @KubernetesDependent( - labelSelector = "app.kubernetes.io/component=tezam," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator" + informer = @Informer(labelSelector = "app.kubernetes.io/component=tezam," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") ) public class TezAmStatefulSetDependent extends HiveDependentResource { @@ -153,6 +154,9 @@ protected StatefulSet desired(HiveCluster hiveCluster, .endSpec() .build(); + applySpreadAffinityIfAbsent( + statefulSet.getSpec().getTemplate().getSpec(), selectorLabels); + if (spec.volumes() != null) { statefulSet.getSpec().getTemplate().getSpec().getVolumes().addAll(spec.volumes()); } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveCluster.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveCluster.java index 6a708e7c8c91..f3887c0e518b 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveCluster.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveCluster.java @@ -30,7 +30,7 @@ * on Kubernetes. It manages Metastore, HiveServer2, LLAP daemons, and Tez AM. */ @Group("hive.apache.org") -@Version("v1alpha1") +@Version("v1") @Kind("HiveCluster") @ShortNames("hc") public class HiveCluster diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterSpec.java index 8d9e17049899..40dd8a771203 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterSpec.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterSpec.java @@ -19,10 +19,12 @@ package org.apache.hive.kubernetes.operator.model; import java.util.List; +import java.util.Objects; import com.fasterxml.jackson.annotation.JsonPropertyDescription; import io.fabric8.crd.generator.annotation.PreserveUnknownFields; import io.fabric8.crd.generator.annotation.SchemaFrom; +import io.fabric8.generator.annotation.Required; import io.fabric8.kubernetes.api.model.EnvVar; import io.fabric8.kubernetes.api.model.Volume; import io.fabric8.kubernetes.api.model.VolumeMount; @@ -43,10 +45,11 @@ public record HiveClusterSpec( MetastoreSpec metastore, @JsonPropertyDescription("HiveServer2 component configuration") HiveServer2Spec hiveServer2, - @JsonPropertyDescription("LLAP daemon configuration. Disabled by default.") + @JsonPropertyDescription("LLAP daemon configuration. Enabled by default.") LlapSpec llap, - @JsonPropertyDescription("Tez Application Master configuration. Disabled by default.") + @JsonPropertyDescription("Tez Application Master configuration. Enabled by default.") TezAmSpec tezAm, + @Required @JsonPropertyDescription( "External ZooKeeper connection details (not managed by this operator)") ZookeeperSpec zookeeper, @@ -73,18 +76,8 @@ public record HiveClusterSpec( List volumeMounts) { public HiveClusterSpec { - image = image != null ? image : "apache/hive:4.3.0-SNAPSHOT"; - imagePullPolicy = imagePullPolicy != null ? imagePullPolicy : "IfNotPresent"; - metastore = metastore != null ? - metastore : - new MetastoreSpec(null, null, null, null, null, null, null, null, null, null, null); - hiveServer2 = hiveServer2 != null ? - hiveServer2 : - new HiveServer2Spec(null, null, null, null, null, null, null, null, null, null, null); - llap = llap != null ? llap : new LlapSpec(null, null, null, null, null, null, null, null, null, null); - tezAm = tezAm != null ? tezAm : new TezAmSpec(null, null, null, null, null, null, null, null); - zookeeper = zookeeper != null ? zookeeper : new ZookeeperSpec(null); - hadoop = hadoop != null ? hadoop : new HadoopSpec(null); + Objects.requireNonNull(zookeeper, + "zookeeper must be provided in the HiveCluster spec"); envVars = envVars != null ? envVars : List.of(); externalJars = externalJars != null ? externalJars : List.of(); volumes = volumes != null ? volumes : List.of(); diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/DatabaseConfig.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/DatabaseConfig.java index 15181b0526c5..a93b4684bd02 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/DatabaseConfig.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/DatabaseConfig.java @@ -19,10 +19,12 @@ package org.apache.hive.kubernetes.operator.model.spec; import com.fasterxml.jackson.annotation.JsonPropertyDescription; +import io.fabric8.generator.annotation.Default; /** JDBC database connection configuration for the Hive Metastore backend. */ public record DatabaseConfig( @JsonPropertyDescription("Database type: derby, mysql, postgres, mssql, or oracle") + @Default("derby") String type, @JsonPropertyDescription("JDBC connection URL") String url, @@ -39,8 +41,6 @@ public record DatabaseConfig( String driverJarUrl) { public DatabaseConfig { - if (type == null) { - type = "derby"; - } + type = type != null ? type : "derby"; } } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HiveServer2Spec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HiveServer2Spec.java index 6b888d42f6c9..78164fb32de6 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HiveServer2Spec.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HiveServer2Spec.java @@ -24,12 +24,14 @@ import com.fasterxml.jackson.annotation.JsonPropertyDescription; import io.fabric8.crd.generator.annotation.PreserveUnknownFields; import io.fabric8.crd.generator.annotation.SchemaFrom; +import io.fabric8.generator.annotation.Default; import io.fabric8.kubernetes.api.model.Volume; import io.fabric8.kubernetes.api.model.VolumeMount; /** Configuration for the HiveServer2 component. */ public record HiveServer2Spec( @JsonPropertyDescription("Number of replicas") + @Default("1") Integer replicas, @JsonPropertyDescription("Resource requirements for pods") ResourceRequirementsSpec resources, @@ -42,11 +44,8 @@ public record HiveServer2Spec( @SchemaFrom(type = Object[].class) @PreserveUnknownFields List extraVolumeMounts, @JsonPropertyDescription("Kubernetes Service type: ClusterIP, LoadBalancer, or NodePort") + @Default("ClusterIP") String serviceType, - @JsonPropertyDescription("HiveServer2 Thrift port") - Integer thriftPort, - @JsonPropertyDescription("HiveServer2 Web UI port") - Integer webUiPort, @JsonPropertyDescription("List of URIs to external JARs to download and add to HS2 classpath ") List externalJars, @JsonPropertyDescription("Readiness probe configuration") @@ -57,8 +56,6 @@ public record HiveServer2Spec( public HiveServer2Spec { replicas = replicas != null ? replicas : 1; serviceType = serviceType != null ? serviceType : "ClusterIP"; - thriftPort = thriftPort != null ? thriftPort : 10000; - webUiPort = webUiPort != null ? webUiPort : 10002; extraVolumes = extraVolumes != null ? extraVolumes : List.of(); extraVolumeMounts = extraVolumeMounts != null ? extraVolumeMounts : List.of(); externalJars = externalJars != null ? externalJars : List.of(); diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/LlapSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/LlapSpec.java index c9648c2f411f..17ff5967ff9a 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/LlapSpec.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/LlapSpec.java @@ -24,12 +24,14 @@ import com.fasterxml.jackson.annotation.JsonPropertyDescription; import io.fabric8.crd.generator.annotation.PreserveUnknownFields; import io.fabric8.crd.generator.annotation.SchemaFrom; +import io.fabric8.generator.annotation.Default; import io.fabric8.kubernetes.api.model.Volume; import io.fabric8.kubernetes.api.model.VolumeMount; /** Configuration for LLAP (Live Long and Process) daemons. */ public record LlapSpec( @JsonPropertyDescription("Number of replicas") + @Default("1") Integer replicas, @JsonPropertyDescription("Resource requirements for pods") ResourceRequirementsSpec resources, @@ -42,10 +44,13 @@ public record LlapSpec( @SchemaFrom(type = Object[].class) @PreserveUnknownFields List extraVolumeMounts, @JsonPropertyDescription("Whether LLAP is enabled") + @Default("true") Boolean enabled, @JsonPropertyDescription("Number of LLAP executors per daemon") + @Default("1") Integer executors, @JsonPropertyDescription("Memory in MB per LLAP daemon instance") + @Default("1024") Integer memoryMb, @JsonPropertyDescription("LLAP service hosts identifier for ZooKeeper registration") String serviceHosts, @@ -54,9 +59,9 @@ public record LlapSpec( public LlapSpec { replicas = replicas != null ? replicas : 1; - enabled = enabled != null ? enabled : false; + enabled = enabled != null ? enabled : true; executors = executors != null ? executors : 1; - memoryMb = memoryMb != null ? memoryMb : 2048; + memoryMb = memoryMb != null ? memoryMb : 1024; serviceHosts = serviceHosts != null ? serviceHosts : "@llap0"; extraVolumes = extraVolumes != null ? extraVolumes : List.of(); extraVolumeMounts = extraVolumeMounts != null ? extraVolumeMounts : List.of(); diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/MetastoreSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/MetastoreSpec.java index 684fbc9fe2e5..307c17221ee7 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/MetastoreSpec.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/MetastoreSpec.java @@ -24,12 +24,14 @@ import com.fasterxml.jackson.annotation.JsonPropertyDescription; import io.fabric8.crd.generator.annotation.PreserveUnknownFields; import io.fabric8.crd.generator.annotation.SchemaFrom; +import io.fabric8.generator.annotation.Default; import io.fabric8.kubernetes.api.model.Volume; import io.fabric8.kubernetes.api.model.VolumeMount; /** Configuration for the Hive Metastore component. */ public record MetastoreSpec( @JsonPropertyDescription("Number of replicas") + @Default("1") Integer replicas, @JsonPropertyDescription("Resource requirements for pods") ResourceRequirementsSpec resources, @@ -44,8 +46,10 @@ public record MetastoreSpec( @JsonPropertyDescription("Database connection configuration for the metastore backend") DatabaseConfig database, @JsonPropertyDescription("Warehouse directory path") + @Default("/hive/warehouse") String warehouseDir, @JsonPropertyDescription("Whether the operator should deploy and manage a Metastore") + @Default("true") Boolean enabled, @JsonPropertyDescription("Thrift URI of the external Metastore (if enabled is false)") String externalUri, @@ -56,8 +60,9 @@ public record MetastoreSpec( public MetastoreSpec { replicas = replicas != null ? replicas : 1; - database = database != null ? database : new DatabaseConfig(null, null, null, null, null, null); - warehouseDir = warehouseDir != null ? warehouseDir : "/opt/hive/data/warehouse"; + database = database != null ? database : new DatabaseConfig( + "derby", null, null, null, null, null); + warehouseDir = warehouseDir != null ? warehouseDir : "/hive/warehouse"; enabled = enabled != null ? enabled : true; extraVolumes = extraVolumes != null ? extraVolumes : List.of(); extraVolumeMounts = extraVolumeMounts != null ? extraVolumeMounts : List.of(); diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ResourceRequirementsSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ResourceRequirementsSpec.java index feff4afa1357..b7b10934bc77 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ResourceRequirementsSpec.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ResourceRequirementsSpec.java @@ -19,12 +19,15 @@ package org.apache.hive.kubernetes.operator.model.spec; import com.fasterxml.jackson.annotation.JsonPropertyDescription; +import io.fabric8.generator.annotation.Default; /** Kubernetes resource requirements specification for CPU and memory. */ public record ResourceRequirementsSpec( @JsonPropertyDescription("CPU request (e.g. 500m, 1)") + @Default("500m") String requestsCpu, @JsonPropertyDescription("Memory request (e.g. 1Gi, 512Mi)") + @Default("1Gi") String requestsMemory, @JsonPropertyDescription("CPU limit (e.g. 2, 1000m)") String limitsCpu, @@ -32,11 +35,7 @@ public record ResourceRequirementsSpec( String limitsMemory) { public ResourceRequirementsSpec { - if (requestsCpu == null) { - requestsCpu = "500m"; - } - if (requestsMemory == null) { - requestsMemory = "1Gi"; - } + requestsCpu = requestsCpu != null ? requestsCpu : "500m"; + requestsMemory = requestsMemory != null ? requestsMemory : "1Gi"; } } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/TezAmSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/TezAmSpec.java index cbb41481c264..a0494c2c5e73 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/TezAmSpec.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/TezAmSpec.java @@ -24,12 +24,14 @@ import com.fasterxml.jackson.annotation.JsonPropertyDescription; import io.fabric8.crd.generator.annotation.PreserveUnknownFields; import io.fabric8.crd.generator.annotation.SchemaFrom; +import io.fabric8.generator.annotation.Default; import io.fabric8.kubernetes.api.model.Volume; import io.fabric8.kubernetes.api.model.VolumeMount; /** Configuration for the Tez Application Master component. */ public record TezAmSpec( @JsonPropertyDescription("Number of replicas") + @Default("1") Integer replicas, @JsonPropertyDescription("Resource requirements for pods") ResourceRequirementsSpec resources, @@ -42,9 +44,11 @@ public record TezAmSpec( @SchemaFrom(type = Object[].class) @PreserveUnknownFields List extraVolumeMounts, @JsonPropertyDescription("Whether Tez AM is enabled") + @Default("true") Boolean enabled, @JsonPropertyDescription("Storage size for the shared scratch PVC " + "(ReadWriteMany) mounted on HS2 and TezAM at /opt/hive/scratch") + @Default("1Gi") String scratchStorageSize, @JsonPropertyDescription("StorageClass for the shared scratch PVC. " + "Must support ReadWriteMany access. If null, uses cluster default.") @@ -52,7 +56,7 @@ public record TezAmSpec( public TezAmSpec { replicas = replicas != null ? replicas : 1; - enabled = enabled != null ? enabled : false; + enabled = enabled != null ? enabled : true; scratchStorageSize = scratchStorageSize != null ? scratchStorageSize : "1Gi"; extraVolumes = extraVolumes != null ? extraVolumes : List.of(); extraVolumeMounts = extraVolumeMounts != null ? extraVolumeMounts : List.of(); diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ZookeeperSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ZookeeperSpec.java index 3161c08b81c7..a33908ae38df 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ZookeeperSpec.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ZookeeperSpec.java @@ -18,16 +18,19 @@ package org.apache.hive.kubernetes.operator.model.spec; +import java.util.Objects; + import com.fasterxml.jackson.annotation.JsonPropertyDescription; +import io.fabric8.generator.annotation.Required; /** External ZooKeeper connection configuration. ZooKeeper is not managed by this operator. */ public record ZookeeperSpec( - @JsonPropertyDescription("ZooKeeper quorum connection string") + @Required + @JsonPropertyDescription("ZooKeeper quorum connection string. This field is strictly required.") String quorum) { public ZookeeperSpec { - if (quorum == null) { - quorum = "zookeeper:2181"; - } + Objects.requireNonNull(quorum, + "ZooKeeper quorum must be explicitly defined in the HiveCluster spec."); } } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveClusterReconciler.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveClusterReconciler.java index c2319adc1f24..20332cb4127c 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveClusterReconciler.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveClusterReconciler.java @@ -32,10 +32,10 @@ import io.fabric8.kubernetes.api.model.batch.v1.Job; import io.javaoperatorsdk.operator.api.reconciler.Context; import io.javaoperatorsdk.operator.api.reconciler.ControllerConfiguration; -import io.javaoperatorsdk.operator.api.reconciler.ErrorStatusHandler; import io.javaoperatorsdk.operator.api.reconciler.ErrorStatusUpdateControl; import io.javaoperatorsdk.operator.api.reconciler.Reconciler; import io.javaoperatorsdk.operator.api.reconciler.UpdateControl; +import io.javaoperatorsdk.operator.api.reconciler.Workflow; import io.javaoperatorsdk.operator.api.reconciler.dependent.Dependent; import org.apache.hive.kubernetes.operator.dependent.HadoopConfigMapDependent; import org.apache.hive.kubernetes.operator.dependent.HiveServer2ConfigMapDependent; @@ -67,105 +67,49 @@ * Main reconciler for the HiveCluster custom resource. * Orchestrates all dependent resources with proper dependency ordering. */ -@ControllerConfiguration( - dependents = { - // --- ConfigMap dependents --- - @Dependent( - name = "hadoop-configmap", - type = HadoopConfigMapDependent.class - ), - @Dependent( - name = "metastore-configmap", - type = MetastoreConfigMapDependent.class, - activationCondition = MetastoreEnabledCondition.class - ), - @Dependent( - name = "hiveserver2-configmap", - type = HiveServer2ConfigMapDependent.class - ), - // --- Job dependents --- - @Dependent( - name = "schema-init-job", - type = SchemaInitJobDependent.class, - dependsOn = {"metastore-configmap", "hadoop-configmap"}, - readyPostcondition = SchemaJobCompletedCondition.class, - activationCondition = MetastoreEnabledCondition.class - ), - // --- Deployment dependents --- - @Dependent( - name = "metastore-deployment", - type = MetastoreDeploymentDependent.class, - dependsOn = {"schema-init-job"}, - readyPostcondition = MetastoreReadyCondition.class, - activationCondition = MetastoreEnabledCondition.class - ), - // --- Service dependents --- - @Dependent( - name = "metastore-service", - type = MetastoreServiceDependent.class, - dependsOn = {"metastore-configmap"}, - activationCondition = MetastoreEnabledCondition.class - ), - @Dependent( - name = "hiveserver2-deployment", - type = HiveServer2DeploymentDependent.class, - dependsOn = {"hiveserver2-configmap", - "hadoop-configmap"}, - reconcilePrecondition = HiveServer2Precondition.class - ), - @Dependent( - name = "hiveserver2-service", - type = HiveServer2ServiceDependent.class, - dependsOn = {"hiveserver2-configmap"} - ), - // --- LLAP (conditional) --- - @Dependent( - name = "llap-configmap", - type = LlapConfigMapDependent.class, - activationCondition = LlapEnabledCondition.class - ), - @Dependent( - name = "llap-statefulset", - type = LlapStatefulSetDependent.class, - dependsOn = {"llap-configmap", "hadoop-configmap"}, - activationCondition = LlapEnabledCondition.class - ), - @Dependent( - name = "llap-service", - type = LlapServiceDependent.class, - activationCondition = LlapEnabledCondition.class - ), - // --- TezAM (conditional) --- - @Dependent( - name = "scratch-pvc", - type = ScratchPvcDependent.class, - activationCondition = TezAmEnabledCondition.class - ), - @Dependent( - name = "tezam-service", - type = TezAmServiceDependent.class, - activationCondition = TezAmEnabledCondition.class - ), - @Dependent( - name = "tezam-statefulset", - type = TezAmStatefulSetDependent.class, - dependsOn = {"hiveserver2-configmap", "hadoop-configmap", - "tezam-service", "scratch-pvc"}, - activationCondition = TezAmEnabledCondition.class - ) - } -) -public class HiveClusterReconciler - implements Reconciler, ErrorStatusHandler { - - private static final Logger LOG = - LoggerFactory.getLogger(HiveClusterReconciler.class); +@ControllerConfiguration +@Workflow(dependents = { + // --- ConfigMap dependents --- + @Dependent(name = "hadoop-configmap", type = HadoopConfigMapDependent.class), + @Dependent(name = "metastore-configmap", type = MetastoreConfigMapDependent.class, + activationCondition = MetastoreEnabledCondition.class), + @Dependent(name = "hiveserver2-configmap", type = HiveServer2ConfigMapDependent.class), + // --- Job dependents --- + @Dependent(name = "schema-init-job", type = SchemaInitJobDependent.class, dependsOn = {"metastore-configmap", + "hadoop-configmap"}, readyPostcondition = SchemaJobCompletedCondition.class, + activationCondition = MetastoreEnabledCondition.class), + // --- Deployment dependents --- + @Dependent(name = "metastore-deployment", type = MetastoreDeploymentDependent.class, dependsOn = { + "schema-init-job"}, readyPostcondition = MetastoreReadyCondition.class, + activationCondition = MetastoreEnabledCondition.class), + // --- Service dependents --- + @Dependent(name = "metastore-service", type = MetastoreServiceDependent.class, dependsOn = { + "metastore-configmap"}, activationCondition = MetastoreEnabledCondition.class), + @Dependent(name = "hiveserver2-deployment", type = HiveServer2DeploymentDependent.class, dependsOn = { + "hiveserver2-configmap", "hadoop-configmap"}, reconcilePrecondition = HiveServer2Precondition.class), + @Dependent(name = "hiveserver2-service", type = HiveServer2ServiceDependent.class, dependsOn = { + "hiveserver2-configmap"}), + // --- LLAP (conditional) --- + @Dependent(name = "llap-configmap", type = LlapConfigMapDependent.class, + activationCondition = LlapEnabledCondition.class), + @Dependent(name = "llap-statefulset", type = LlapStatefulSetDependent.class, dependsOn = {"llap-configmap", + "hadoop-configmap"}, activationCondition = LlapEnabledCondition.class), + @Dependent(name = "llap-service", type = LlapServiceDependent.class, + activationCondition = LlapEnabledCondition.class), + // --- TezAM (conditional) --- + @Dependent(name = "scratch-pvc", type = ScratchPvcDependent.class, + activationCondition = TezAmEnabledCondition.class), + @Dependent(name = "tezam-service", type = TezAmServiceDependent.class, + activationCondition = TezAmEnabledCondition.class), + @Dependent(name = "tezam-statefulset", type = TezAmStatefulSetDependent.class, dependsOn = {"hiveserver2-configmap", + "hadoop-configmap", "tezam-service", "scratch-pvc"}, activationCondition = TezAmEnabledCondition.class)}) +public class HiveClusterReconciler implements Reconciler { + + private static final Logger LOG = LoggerFactory.getLogger(HiveClusterReconciler.class); @Override - public UpdateControl reconcile(HiveCluster resource, - Context context) { - LOG.info("Reconciling HiveCluster: {}/{}", - resource.getMetadata().getNamespace(), + public UpdateControl reconcile(HiveCluster resource, Context context) { + LOG.debug("Reconciling HiveCluster: {}/{}", resource.getMetadata().getNamespace(), resource.getMetadata().getName()); HiveClusterStatus existingStatus = resource.getStatus(); @@ -180,17 +124,15 @@ public UpdateControl reconcile(HiveCluster resource, } @Override - public ErrorStatusUpdateControl updateErrorStatus( - HiveCluster resource, Context context, Exception e) { - LOG.error("Error reconciling HiveCluster: {}/{}", - resource.getMetadata().getNamespace(), + public ErrorStatusUpdateControl updateErrorStatus(HiveCluster resource, Context context, + Exception e) { + LOG.error("Error reconciling HiveCluster: {}/{}", resource.getMetadata().getNamespace(), resource.getMetadata().getName(), e); - HiveClusterStatus status = resource.getStatus() != null - ? resource.getStatus() : new HiveClusterStatus(); + HiveClusterStatus status = resource.getStatus() != null ? resource.getStatus() : new HiveClusterStatus(); - List existingConditions = status.getConditions() != null - ? status.getConditions() : Collections.emptyList(); + List existingConditions = + status.getConditions() != null ? status.getConditions() : Collections.emptyList(); status.setConditions(List.of( buildCondition("Ready", "False", "ReconciliationError", diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/ConfigUtils.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/ConfigUtils.java new file mode 100644 index 000000000000..0f86201817e7 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/ConfigUtils.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.util; + +import java.util.Map; + +public final class ConfigUtils { + + private ConfigUtils() { + } + + public static final String METASTORE_THRIFT_PORT_KEY = "metastore.thrift.port"; + public static final String METASTORE_THRIFT_PORT_HIVE_KEY = "hive.metastore.port"; + public static final int METASTORE_THRIFT_PORT_DEFAULT = 9083; + + public static final String METASTORE_WAREHOUSE_KEY = "metastore.warehouse.dir"; + + public static final String METASTORE_CONNECTION_URL_KEY = "javax.jdo.option.ConnectionURL"; + + public static final String METASTORE_CONNECTION_DRIVER_KEY = "javax.jdo.option.ConnectionDriverName"; + + public static final String METASTORE_CONNECTION_USER_KEY = "javax.jdo.option.ConnectionUserName"; + + public static final String METASTORE_URIS_KEY = "hive.metastore.uris"; + + public static final String HIVE_METASTORE_WAREHOUSE_KEY = "hive.metastore.warehouse.dir"; + + public static final String HIVE_SERVER2_ENABLE_DOAS_KEY = "hive.server2.enable.doAs"; + + public static final String HIVE_TEZ_EXEC_INPLACE_PROGRESS_KEY = "hive.tez.exec.inplace.progress"; + + public static final String HIVE_TEZ_EXEC_SUMMARY_KEY = "hive.tez.exec.print.summary"; + + public static final String HIVE_JAR_DIRECTORY_KEY = "hive.jar.directory"; + + public static final String HIVE_USER_INSTALL_DIR_KEY = "hive.user.install.directory"; + + public static final String HIVE_LOCAL_SCRATCH_DIR_KEY = "hive.exec.local.scratchdir"; + + public static final String HIVE_SERVER2_TEZ_USE_EXTERNAL_SESSIONS_KEY = "hive.server2.tez.use.external.sessions"; + + public static final String HIVE_SERVER2_TEZ_EXTERNAL_SESSIONS_NAMESPACE_KEY = + "hive.server2.tez.external.sessions.namespace"; + + public static final String HIVE_SERVER2_TEZ_EXTERNAL_SESSIONS_REGISTRY_CLASS_KEY = + "hive.server2.tez.external.sessions.registry.class"; + + public static final String HIVE_ZOOKEEPER_QUORUM_KEY = "hive.zookeeper.quorum"; + + public static final String HIVE_EXECUTION_MODE_KEY = "hive.execution.mode"; + + public static final String HIVE_LLAP_EXECUTION_MODE_KEY = "hive.llap.execution.mode"; + + public static final String HIVE_LLAP_DAEMON_SERVICE_HOSTS_KEY = "hive.llap.daemon.service.hosts"; + + public static final String HIVE_LLAP_DAEMON_MEMORY_MB_KEY = "hive.llap.daemon.memory.per.instance.mb"; + + public static final String HIVE_LLAP_DAEMON_NUM_EXECUTORS_KEY = "hive.llap.daemon.num.executors"; + + public static final String HIVE_METASTORE_URIS_KEY = "hive.metastore.uris"; + + public static final String HIVE_SERVER2_THRIFT_PORT_KEY = "hive.server2.thrift.port"; + public static final int HIVE_SERVER2_THRIFT_PORT_DEFAULT = 10000; + + public static final String HIVE_SERVER2_WEBUI_PORT_KEY = "hive.server2.webui.port"; + public static final int HIVE_SERVER2_WEBUI_PORT_DEFAULT = 10002; + + public static final String TEZ_AM_SESSION_MODE_KEY = "tez.am.mode.session"; + + public static final String TEZ_IGNORE_LIB_URIS_KEY = "tez.ignore.lib.uris"; + + public static final String TEZ_AM_WEBSERVICE_ENABLE_KEY = "tez.am.webservice.enable"; + + public static final String TEZ_AM_DISABLE_CLIENT_VERSION_CHECK_KEY = "tez.am.disable.client-version-check"; + + public static final String TEZ_SESSION_AM_DAG_SUBMIT_TIMEOUT_SECS_KEY = "tez.session.am.dag.submit.timeout.secs"; + + public static final String TEZ_LOCAL_MODE_KEY = "tez.local.mode"; + + /** tez.am.framework.mode - only available in Tez 1.0.0+ */ + public static final String TEZ_AM_FRAMEWORK_MODE_KEY = "tez.am.framework.mode"; + + /** tez.am.registry.namespace - only available in Tez 1.0.0+ */ + public static final String TEZ_AM_REGISTRY_NAMESPACE_KEY = "tez.am.registry.namespace"; + + /** tez.am.zookeeper.quorum - only available in Tez 1.0.0+ */ + public static final String TEZ_AM_ZOOKEEPER_QUORUM_KEY = "tez.am.zookeeper.quorum"; + + public static int getInt(Map overrides, + String key, String altKey, int defaultVal) { + if (overrides != null) { + String val = overrides.get(key); + if (val == null && altKey != null) { + val = overrides.get(altKey); + } + if (val != null) { + return Integer.parseInt(val); + } + } + return defaultVal; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/HiveConfigBuilder.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/HiveConfigBuilder.java index 2e506febf132..5db24e95d3f3 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/HiveConfigBuilder.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/HiveConfigBuilder.java @@ -45,43 +45,53 @@ public static Map getHiveServer2HiveSite( boolean tezAmEnabled = spec.tezAm().isEnabled(); String zkQuorum = spec.zookeeper().quorum(); + int metastorePort = ConfigUtils.getInt( + spec.metastore().configOverrides(), + ConfigUtils.METASTORE_THRIFT_PORT_KEY, + ConfigUtils.METASTORE_THRIFT_PORT_HIVE_KEY, + ConfigUtils.METASTORE_THRIFT_PORT_DEFAULT); String metastoreUri = spec.metastore().isEnabled() - ? "thrift://" + hiveCluster.getMetadata().getName() + "-metastore:9083" + ? "thrift://" + hiveCluster.getMetadata().getName() + + "-metastore:" + metastorePort : spec.metastore().externalUri(); if (metastoreUri != null && !metastoreUri.isEmpty()) { - props.put("hive.metastore.uris", metastoreUri); + props.put(ConfigUtils.METASTORE_URIS_KEY, metastoreUri); } - props.put("hive.metastore.warehouse.dir", spec.metastore().warehouseDir()); - props.put("hive.server2.enable.doAs", "false"); - props.put("hive.tez.exec.inplace.progress", "false"); - props.put("hive.tez.exec.print.summary", "true"); - props.put("hive.jar.directory", "/tmp"); - props.put("hive.user.install.directory", "/tmp"); + props.put(ConfigUtils.HIVE_METASTORE_WAREHOUSE_KEY, + spec.metastore().warehouseDir()); + props.put(ConfigUtils.HIVE_SERVER2_ENABLE_DOAS_KEY, "false"); + props.put(ConfigUtils.HIVE_TEZ_EXEC_INPLACE_PROGRESS_KEY, "false"); + props.put(ConfigUtils.HIVE_TEZ_EXEC_SUMMARY_KEY, "true"); + props.put(ConfigUtils.HIVE_JAR_DIRECTORY_KEY, "/tmp"); + props.put(ConfigUtils.HIVE_USER_INSTALL_DIR_KEY, "/tmp"); if (tezAmEnabled) { - props.put("hive.exec.local.scratchdir", "/opt/hive/scratch"); + props.put(ConfigUtils.HIVE_LOCAL_SCRATCH_DIR_KEY, + "/opt/hive/scratch"); } if (tezAmEnabled) { - props.put("hive.server2.tez.use.external.sessions", "true"); - props.put("hive.server2.tez.external.sessions.namespace", + props.put(ConfigUtils.HIVE_SERVER2_TEZ_USE_EXTERNAL_SESSIONS_KEY, "true"); + props.put(ConfigUtils.HIVE_SERVER2_TEZ_EXTERNAL_SESSIONS_NAMESPACE_KEY, "/tez-external-sessions/tez_am/server"); - props.put("hive.server2.tez.external.sessions.registry.class", - "org.apache.hadoop.hive.ql.exec.tez." - + "ZookeeperExternalSessionsRegistryClient"); - props.put("hive.zookeeper.quorum", zkQuorum); - props.put("tez.am.framework.mode", "STANDALONE_ZOOKEEPER"); - props.put("tez.am.registry.namespace", "/tez_am/server"); - props.put("tez.am.zookeeper.quorum", zkQuorum); + props.put(ConfigUtils.HIVE_SERVER2_TEZ_EXTERNAL_SESSIONS_REGISTRY_CLASS_KEY, + "org.apache.hadoop.hive.ql.exec.tez.ZookeeperExternalSessionsRegistryClient"); + props.put(ConfigUtils.HIVE_ZOOKEEPER_QUORUM_KEY, zkQuorum); + // tez.am.framework.mode, tez.am.registry.namespace, tez.am.zookeeper.quorum + // are only in Tez 1.0.0+ + props.put(ConfigUtils.TEZ_AM_FRAMEWORK_MODE_KEY, "STANDALONE_ZOOKEEPER"); + props.put(ConfigUtils.TEZ_AM_REGISTRY_NAMESPACE_KEY, "/tez_am/server"); + props.put(ConfigUtils.TEZ_AM_ZOOKEEPER_QUORUM_KEY, zkQuorum); LlapSpec llap = spec.llap(); if (llap.isEnabled()) { - props.put("hive.execution.mode", "llap"); - props.put("hive.llap.execution.mode", "all"); - props.put("hive.llap.daemon.service.hosts", llap.serviceHosts()); + props.put(ConfigUtils.HIVE_EXECUTION_MODE_KEY, "llap"); + props.put(ConfigUtils.HIVE_LLAP_EXECUTION_MODE_KEY, "all"); + props.put(ConfigUtils.HIVE_LLAP_DAEMON_SERVICE_HOSTS_KEY, + llap.serviceHosts()); } } else { - props.put("hive.server2.tez.use.external.sessions", "false"); - props.put("tez.local.mode", "true"); - props.put("tez.am.framework.mode", "LOCAL"); + props.put(ConfigUtils.HIVE_SERVER2_TEZ_USE_EXTERNAL_SESSIONS_KEY, "false"); + props.put(ConfigUtils.TEZ_LOCAL_MODE_KEY, "true"); + props.put(ConfigUtils.TEZ_AM_FRAMEWORK_MODE_KEY, "LOCAL"); props.put("mapreduce.framework.name", "local"); } @@ -97,24 +107,25 @@ public static Map getTezSite(HiveClusterSpec spec) { String zkQuorum = spec.zookeeper().quorum(); Map tezProps = new LinkedHashMap<>(); - tezProps.put("tez.am.mode.session", "true"); - tezProps.put("tez.ignore.lib.uris", "true"); - tezProps.put("tez.am.tez-ui.webservice.enable", "false"); - tezProps.put("tez.am.disable.client-version-check", "true"); - tezProps.put("tez.session.am.dag.submit.timeout.secs", "-1"); - tezProps.put("tez.am.zookeeper.quorum", zkQuorum); - tezProps.put("hive.zookeeper.quorum", zkQuorum); + tezProps.put(ConfigUtils.TEZ_AM_SESSION_MODE_KEY, "true"); + tezProps.put(ConfigUtils.TEZ_IGNORE_LIB_URIS_KEY, "true"); + tezProps.put(ConfigUtils.TEZ_AM_WEBSERVICE_ENABLE_KEY, "false"); + tezProps.put(ConfigUtils.TEZ_AM_DISABLE_CLIENT_VERSION_CHECK_KEY, "true"); + tezProps.put(ConfigUtils.TEZ_SESSION_AM_DAG_SUBMIT_TIMEOUT_SECS_KEY, "-1"); + tezProps.put(ConfigUtils.TEZ_AM_ZOOKEEPER_QUORUM_KEY, zkQuorum); + tezProps.put(ConfigUtils.HIVE_ZOOKEEPER_QUORUM_KEY, zkQuorum); if (tezAmEnabled) { - tezProps.put("tez.local.mode", "false"); - tezProps.put("tez.am.framework.mode", "STANDALONE_ZOOKEEPER"); - tezProps.put("tez.am.registry.namespace", "/tez_am/server"); + tezProps.put(ConfigUtils.TEZ_LOCAL_MODE_KEY, "false"); + tezProps.put(ConfigUtils.TEZ_AM_FRAMEWORK_MODE_KEY, "STANDALONE_ZOOKEEPER"); + tezProps.put(ConfigUtils.TEZ_AM_REGISTRY_NAMESPACE_KEY, "/tez_am/server"); } else { - tezProps.put("tez.local.mode", "true"); + tezProps.put(ConfigUtils.TEZ_LOCAL_MODE_KEY, "true"); } LlapSpec llap = spec.llap(); if (llap.isEnabled()) { - tezProps.put("hive.llap.daemon.service.hosts", llap.serviceHosts()); + tezProps.put(ConfigUtils.HIVE_LLAP_DAEMON_SERVICE_HOSTS_KEY, + llap.serviceHosts()); } if (spec.tezAm().configOverrides() != null) { @@ -138,18 +149,19 @@ public static Map getMetastoreSite(HiveClusterSpec spec) { MetastoreSpec metastore = spec.metastore(); Map props = new LinkedHashMap<>(); - props.put("metastore.warehouse.dir", metastore.warehouseDir()); + props.put(ConfigUtils.METASTORE_WAREHOUSE_KEY, + metastore.warehouseDir()); DatabaseConfig db = metastore.database(); if (db != null) { if (db.url() != null) { - props.put("javax.jdo.option.ConnectionURL", db.url()); + props.put(ConfigUtils.METASTORE_CONNECTION_URL_KEY, db.url()); } if (db.driver() != null) { - props.put("javax.jdo.option.ConnectionDriverName", db.driver()); + props.put(ConfigUtils.METASTORE_CONNECTION_DRIVER_KEY, db.driver()); } if (db.username() != null) { - props.put("javax.jdo.option.ConnectionUserName", db.username()); + props.put(ConfigUtils.METASTORE_CONNECTION_USER_KEY, db.username()); } } @@ -164,12 +176,14 @@ public static Map getLlapDaemonSite(HiveClusterSpec spec) { LlapSpec llap = spec.llap(); Map props = new LinkedHashMap<>(); - props.put("hive.llap.daemon.memory.per.instance.mb", + props.put(ConfigUtils.HIVE_LLAP_DAEMON_MEMORY_MB_KEY, String.valueOf(llap.memoryMb())); - props.put("hive.llap.daemon.num.executors", + props.put(ConfigUtils.HIVE_LLAP_DAEMON_NUM_EXECUTORS_KEY, String.valueOf(llap.executors())); - props.put("hive.llap.daemon.service.hosts", llap.serviceHosts()); - props.put("hive.zookeeper.quorum", spec.zookeeper().quorum()); + props.put(ConfigUtils.HIVE_LLAP_DAEMON_SERVICE_HOSTS_KEY, + llap.serviceHosts()); + props.put(ConfigUtils.HIVE_ZOOKEEPER_QUORUM_KEY, + spec.zookeeper().quorum()); if (llap.configOverrides() != null) { props.putAll(llap.configOverrides()); diff --git a/pom.xml b/pom.xml index 9682ad8e5b89..4481607165db 100644 --- a/pom.xml +++ b/pom.xml @@ -99,8 +99,8 @@ 3.1.0 2.16.0 3.6.0 - 4.9.6 - 6.13.4 + 5.3.4 + 7.7.0 3.5.3 2.7.10 2.3.0