Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions conf/zeppelin-env.cmd.template
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ REM set ZEPPELIN_NOTEBOOK_S3_KMS_KEY_ID REM AWS KMS key ID
REM set ZEPPELIN_NOTEBOOK_S3_KMS_KEY_REGION REM AWS KMS key region
REM set ZEPPELIN_NOTEBOOK_S3_SSE REM Server-side encryption enabled for notebooks
REM set ZEPPELIN_NOTEBOOK_S3_PATH_STYLE_ACCESS REM Path style access for S3 bucket
REM set ZEPPELIN_CONFIG_STORAGE_CLASS REM Configuration persistence layer implementation
REM set ZEPPELIN_CONFIG_FS_DIR REM Path for interpreter.json, notebook-authorization.json, and credentials.json
REM set ZEPPELIN_IDENT_STRING REM A string representing this instance of zeppelin. $USER by default.
REM set ZEPPELIN_NICENESS REM The scheduling priority for daemons. Defaults to 0.
REM set ZEPPELIN_INTERPRETER_LOCALREPO REM Local repository for interpreter's additional dependency loading
Expand Down
3 changes: 3 additions & 0 deletions conf/zeppelin-env.sh.template
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@
# export ZEPPELIN_NOTEBOOK_S3_SSE # Server-side encryption enabled for notebooks
# export ZEPPELIN_NOTEBOOK_S3_PATH_STYLE_ACCESS # Path style access for S3 bucket

# export ZEPPELIN_CONFIG_STORAGE_CLASS # Configuration persistence layer implementation
# export ZEPPELIN_CONFIG_FS_DIR # Path for interpreter.json, notebook-authorization.json, and credentials.json

# export ZEPPELIN_NOTEBOOK_GCS_STORAGE_DIR # GCS "directory" (prefix) under which notebooks are saved. E.g. gs://example-bucket/path/to/dir
# export GOOGLE_APPLICATION_CREDENTIALS # Provide a service account key file for GCS and BigQuery API calls (overrides application default credentials)

Expand Down
20 changes: 20 additions & 0 deletions conf/zeppelin-site.xml.template
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,26 @@

-->

<!-- Amazon S3 configuration storage through Hadoop S3A -->
<!-- Stores interpreter.json, notebook-authorization.json, and credentials.json under s3a://{bucket}/{prefix}. -->
<!-- Configure S3A credentials, endpoint, encryption, and bucket policy through Hadoop configuration. -->
<!-- Requires hadoop-aws and compatible AWS SDK jars on the Zeppelin server classpath. -->
<!-- S3A does not enforce POSIX permissions; protect credentials.json with bucket policy/object ownership. -->
<!--
<property>
<name>zeppelin.config.storage.class</name>
<value>org.apache.zeppelin.storage.FileSystemConfigStorage</value>
<description>configuration persistence layer implementation</description>
</property>

<property>
<name>zeppelin.config.fs.dir</name>
<value>s3a://zeppelin/user/config</value>
<description>S3A path for Zeppelin configuration files</description>
</property>

-->

<!-- Additionally, encryption is supported for notebook data stored in S3 -->
<!-- Use the AWS KMS to encrypt data -->
<!-- If used, the EC2 role assigned to the EMR cluster must have rights to use the given key -->
Expand Down
12 changes: 12 additions & 0 deletions docs/setup/operation/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,18 @@ Sources descending by priority:
<td></td>
<td>Optional override to control which signature algorithm should be used to sign AWS requests</td>
</tr>
<tr>
<td><h6 class="properties">ZEPPELIN_CONFIG_STORAGE_CLASS</h6></td>
<td><h6 class="properties">zeppelin.config.storage.class</h6></td>
<td>org.apache.zeppelin.storage.LocalConfigStorage</td>
<td>Configuration persistence layer implementation for <code>interpreter.json</code>, <code>notebook-authorization.json</code>, and <code>credentials.json</code></td>
</tr>
<tr>
<td><h6 class="properties">ZEPPELIN_CONFIG_FS_DIR</h6></td>
<td><h6 class="properties">zeppelin.config.fs.dir</h6></td>
<td></td>
<td>Path for <code>FileSystemConfigStorage</code>, for example <code>hdfs://...</code> or <code>s3a://bucket/prefix</code>. S3A requires <code>hadoop-aws</code> and compatible AWS SDK jars on the Zeppelin server classpath.</td>
</tr>
<tr>
<td><h6 class="properties">ZEPPELIN_NOTEBOOK_AZURE_CONNECTION_STRING</h6></td>
<td><h6 class="properties">zeppelin.notebook.azure.connectionString</h6></td>
Expand Down
38 changes: 36 additions & 2 deletions docs/setup/storage/configuration_storage.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,41 @@ limitations under the License.
Zeppelin has lots of configuration which is stored in files:
- `interpreter.json` (This file contains all the interpreter setting info)
- `notebook-authorization.json` (This file contains all the note authorization info)
- `credential.json` (This file contains the credential info)
- `credentials.json` (This file contains the credential info)

## Configuration Storage in S3-compatible object storage

Zeppelin can persist configuration state in S3-compatible object storage by reusing
`FileSystemConfigStorage` with Hadoop S3A. This keeps configuration storage on the
same Hadoop-compatible storage abstraction used for HDFS and other filesystems.

Set the following properties in `zeppelin-site.xml`:

```xml
<property>
<name>zeppelin.config.storage.class</name>
<value>org.apache.zeppelin.storage.FileSystemConfigStorage</value>
<description>configuration persistence layer implementation</description>
</property>
<property>
<name>zeppelin.config.fs.dir</name>
<value>s3a://bucket_name/user/config</value>
<description>S3A path for Zeppelin configuration files</description>
</property>
```

Also ensure the Zeppelin server classpath contains the Hadoop S3A runtime:
`hadoop-aws` built for the same Hadoop version as Zeppelin, plus its compatible
AWS SDK dependencies. `HADOOP_CONF_DIR` is still required so Zeppelin can find
the Hadoop configuration files that define S3A credentials, endpoint, encryption,
and bucket policy settings. For example, configure properties such as
`fs.s3a.aws.credentials.provider`, `fs.s3a.endpoint`,
`fs.s3a.server-side-encryption-algorithm`, and
`fs.s3a.server-side-encryption.key` in your Hadoop configuration as needed.

S3A does not enforce POSIX permissions on objects. When credentials persistence is
enabled, protect `credentials.json` with S3 bucket policy, object ownership, and
encryption settings instead of relying on owner-only file permissions.

## Configuration Storage in hadoop compatible file system

Expand Down Expand Up @@ -62,4 +96,4 @@ By default, zeppelin store configuration on local file system.
<value></value>
<description>path on local file system</description>
</property>
```
```
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,14 @@
import org.apache.zeppelin.util.ReflectionUtils;

import java.io.IOException;

import java.util.List;

/**
* Interface for storing zeppelin configuration.
*
* 1. interpreter-setting.json
* 2. helium.json
* 3. notebook-authorization.json
* 4. credentials.json
* 1. interpreter.json
* 2. notebook-authorization.json
* 3. credentials.json
*
*/
public abstract class ConfigStorage {
Expand All @@ -51,7 +50,6 @@ public static ConfigStorage createConfigStorage(ZeppelinConfiguration zConf) thr
new Class[] {ZeppelinConfiguration.class}, new Object[] {zConf});
}


protected ConfigStorage(ZeppelinConfiguration zConf) {
this.zConf = zConf;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
public class FileSystemConfigStorage extends ConfigStorage {

private static final Logger LOGGER = LoggerFactory.getLogger(FileSystemConfigStorage.class);
private static final String S3A = "s3a";

private FileSystemStorage fs;
private Path interpreterSettingPath;
Expand All @@ -55,6 +56,10 @@ public FileSystemConfigStorage(ZeppelinConfiguration zConf) throws IOException {
Path configPath = this.fs.makeQualified(new Path(configDir));
this.fs.tryMkDir(configPath);
LOGGER.info("Using folder {} to store Zeppelin Config", configPath);
if (zConf.credentialsPersist() && S3A.equalsIgnoreCase(configPath.toUri().getScheme())) {
LOGGER.warn("S3A does not enforce POSIX file permissions. Protect {} with S3 bucket policy, "
+ "object ownership, and encryption settings.", zConf.getCredentialsPath(false));
}
this.interpreterSettingPath = fs.makeQualified(new Path(zConf.getInterpreterSettingPath(false)));
this.authorizationPath = fs.makeQualified(new Path(zConf.getNotebookAuthorizationPath(false)));
this.credentialPath = fs.makeQualified(new Path(zConf.getCredentialsPath(false)));
Expand Down
Loading