diff --git a/conf/zeppelin-env.cmd.template b/conf/zeppelin-env.cmd.template index 15c88fd4ca8..db972faaf71 100644 --- a/conf/zeppelin-env.cmd.template +++ b/conf/zeppelin-env.cmd.template @@ -38,6 +38,8 @@ REM set ZEPPELIN_NOTEBOOK_S3_KMS_KEY_ID REM AWS KMS key ID REM set ZEPPELIN_NOTEBOOK_S3_KMS_KEY_REGION REM AWS KMS key region REM set ZEPPELIN_NOTEBOOK_S3_SSE REM Server-side encryption enabled for notebooks REM set ZEPPELIN_NOTEBOOK_S3_PATH_STYLE_ACCESS REM Path style access for S3 bucket +REM set ZEPPELIN_CONFIG_STORAGE_CLASS REM Configuration persistence layer implementation +REM set ZEPPELIN_CONFIG_FS_DIR REM Path for interpreter.json, notebook-authorization.json, and credentials.json REM set ZEPPELIN_IDENT_STRING REM A string representing this instance of zeppelin. $USER by default. REM set ZEPPELIN_NICENESS REM The scheduling priority for daemons. Defaults to 0. REM set ZEPPELIN_INTERPRETER_LOCALREPO REM Local repository for interpreter's additional dependency loading diff --git a/conf/zeppelin-env.sh.template b/conf/zeppelin-env.sh.template index e8160b563c9..d3a0ec90d4d 100644 --- a/conf/zeppelin-env.sh.template +++ b/conf/zeppelin-env.sh.template @@ -46,6 +46,9 @@ # export ZEPPELIN_NOTEBOOK_S3_SSE # Server-side encryption enabled for notebooks # export ZEPPELIN_NOTEBOOK_S3_PATH_STYLE_ACCESS # Path style access for S3 bucket +# export ZEPPELIN_CONFIG_STORAGE_CLASS # Configuration persistence layer implementation +# export ZEPPELIN_CONFIG_FS_DIR # Path for interpreter.json, notebook-authorization.json, and credentials.json + # export ZEPPELIN_NOTEBOOK_GCS_STORAGE_DIR # GCS "directory" (prefix) under which notebooks are saved. E.g. gs://example-bucket/path/to/dir # export GOOGLE_APPLICATION_CREDENTIALS # Provide a service account key file for GCS and BigQuery API calls (overrides application default credentials) diff --git a/conf/zeppelin-site.xml.template b/conf/zeppelin-site.xml.template index d5e54b91f16..e410a1948e8 100755 --- a/conf/zeppelin-site.xml.template +++ b/conf/zeppelin-site.xml.template @@ -165,6 +165,26 @@ --> + + + + + + + diff --git a/docs/setup/operation/configuration.md b/docs/setup/operation/configuration.md index 9588cd25a5b..6ef3850c1b0 100644 --- a/docs/setup/operation/configuration.md +++ b/docs/setup/operation/configuration.md @@ -316,6 +316,18 @@ Sources descending by priority: Optional override to control which signature algorithm should be used to sign AWS requests + +
ZEPPELIN_CONFIG_STORAGE_CLASS
+
zeppelin.config.storage.class
+ org.apache.zeppelin.storage.LocalConfigStorage + Configuration persistence layer implementation for interpreter.json, notebook-authorization.json, and credentials.json + + +
ZEPPELIN_CONFIG_FS_DIR
+
zeppelin.config.fs.dir
+ + Path for FileSystemConfigStorage, for example hdfs://... or s3a://bucket/prefix. S3A requires hadoop-aws and compatible AWS SDK jars on the Zeppelin server classpath. +
ZEPPELIN_NOTEBOOK_AZURE_CONNECTION_STRING
zeppelin.notebook.azure.connectionString
diff --git a/docs/setup/storage/configuration_storage.md b/docs/setup/storage/configuration_storage.md index 3a5bbff9dfb..fa62462dc88 100644 --- a/docs/setup/storage/configuration_storage.md +++ b/docs/setup/storage/configuration_storage.md @@ -27,7 +27,41 @@ limitations under the License. Zeppelin has lots of configuration which is stored in files: - `interpreter.json` (This file contains all the interpreter setting info) - `notebook-authorization.json` (This file contains all the note authorization info) -- `credential.json` (This file contains the credential info) +- `credentials.json` (This file contains the credential info) + +## Configuration Storage in S3-compatible object storage + +Zeppelin can persist configuration state in S3-compatible object storage by reusing +`FileSystemConfigStorage` with Hadoop S3A. This keeps configuration storage on the +same Hadoop-compatible storage abstraction used for HDFS and other filesystems. + +Set the following properties in `zeppelin-site.xml`: + +```xml + + zeppelin.config.storage.class + org.apache.zeppelin.storage.FileSystemConfigStorage + configuration persistence layer implementation + + + zeppelin.config.fs.dir + s3a://bucket_name/user/config + S3A path for Zeppelin configuration files + +``` + +Also ensure the Zeppelin server classpath contains the Hadoop S3A runtime: +`hadoop-aws` built for the same Hadoop version as Zeppelin, plus its compatible +AWS SDK dependencies. `HADOOP_CONF_DIR` is still required so Zeppelin can find +the Hadoop configuration files that define S3A credentials, endpoint, encryption, +and bucket policy settings. For example, configure properties such as +`fs.s3a.aws.credentials.provider`, `fs.s3a.endpoint`, +`fs.s3a.server-side-encryption-algorithm`, and +`fs.s3a.server-side-encryption.key` in your Hadoop configuration as needed. + +S3A does not enforce POSIX permissions on objects. When credentials persistence is +enabled, protect `credentials.json` with S3 bucket policy, object ownership, and +encryption settings instead of relying on owner-only file permissions. ## Configuration Storage in hadoop compatible file system @@ -62,4 +96,4 @@ By default, zeppelin store configuration on local file system. path on local file system -``` \ No newline at end of file +``` diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/storage/ConfigStorage.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/storage/ConfigStorage.java index 6c9692dd8ff..421b6f211f4 100644 --- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/storage/ConfigStorage.java +++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/storage/ConfigStorage.java @@ -27,15 +27,14 @@ import org.apache.zeppelin.util.ReflectionUtils; import java.io.IOException; - import java.util.List; + /** * Interface for storing zeppelin configuration. * - * 1. interpreter-setting.json - * 2. helium.json - * 3. notebook-authorization.json - * 4. credentials.json + * 1. interpreter.json + * 2. notebook-authorization.json + * 3. credentials.json * */ public abstract class ConfigStorage { @@ -51,7 +50,6 @@ public static ConfigStorage createConfigStorage(ZeppelinConfiguration zConf) thr new Class[] {ZeppelinConfiguration.class}, new Object[] {zConf}); } - protected ConfigStorage(ZeppelinConfiguration zConf) { this.zConf = zConf; } diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/storage/FileSystemConfigStorage.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/storage/FileSystemConfigStorage.java index ac7d108f27b..f4937961c7e 100644 --- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/storage/FileSystemConfigStorage.java +++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/storage/FileSystemConfigStorage.java @@ -41,6 +41,7 @@ public class FileSystemConfigStorage extends ConfigStorage { private static final Logger LOGGER = LoggerFactory.getLogger(FileSystemConfigStorage.class); + private static final String S3A = "s3a"; private FileSystemStorage fs; private Path interpreterSettingPath; @@ -55,6 +56,10 @@ public FileSystemConfigStorage(ZeppelinConfiguration zConf) throws IOException { Path configPath = this.fs.makeQualified(new Path(configDir)); this.fs.tryMkDir(configPath); LOGGER.info("Using folder {} to store Zeppelin Config", configPath); + if (zConf.credentialsPersist() && S3A.equalsIgnoreCase(configPath.toUri().getScheme())) { + LOGGER.warn("S3A does not enforce POSIX file permissions. Protect {} with S3 bucket policy, " + + "object ownership, and encryption settings.", zConf.getCredentialsPath(false)); + } this.interpreterSettingPath = fs.makeQualified(new Path(zConf.getInterpreterSettingPath(false))); this.authorizationPath = fs.makeQualified(new Path(zConf.getNotebookAuthorizationPath(false))); this.credentialPath = fs.makeQualified(new Path(zConf.getCredentialsPath(false)));