Skip to content

Commit e65a9e1

Browse files
authored
[fs] Add filesystem support for Huawei OBS Storage (#5417)
1 parent 4362b79 commit e65a9e1

File tree

16 files changed

+1272
-0
lines changed

16 files changed

+1272
-0
lines changed

docs/content/maintenance/filesystems.md

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ FileSystem pluggable jars for user to query tables from Spark/Hive side.
4242
| Aliyun OSS | oss:// | Y | |
4343
| S3 | s3:// | Y | |
4444
| Tencent Cloud Object Storage | cosn:// | Y | |
45+
| Huawei OBS | obs:// | Y | |
4546

4647
## Dependency
4748

@@ -514,3 +515,85 @@ spark-sql \
514515
{{< /tab >}}
515516

516517
{{< /tabs >}}
518+
519+
## OBS
520+
521+
{{< stable >}}
522+
523+
Download [paimon-obs-{{< version >}}.jar](https://repo.maven.apache.org/maven2/org/apache/paimon/paimon-obs/{{< version >}}/paimon-obs-{{< version >}}.jar).
524+
525+
{{< /stable >}}
526+
527+
{{< unstable >}}
528+
529+
Download [paimon-obs-{{< version >}}.jar](https://repository.apache.org/snapshots/org/apache/paimon/paimon-obs/{{< version >}}/).
530+
531+
{{< /unstable >}}
532+
533+
{{< tabs "obs" >}}
534+
535+
{{< tab "Flink" >}}
536+
537+
{{< hint info >}}
538+
If you have already configured [obs access through Flink](https://nightlies.apache.org/flink/flink-docs-stable/docs/deployment/filesystems/s3/) (Via Flink FileSystem),
539+
here you can skip the following configuration.
540+
{{< /hint >}}
541+
542+
Put `paimon-obs-{{< version >}}.jar` into `lib` directory of your Flink home, and create catalog:
543+
544+
```sql
545+
CREATE CATALOG my_catalog WITH (
546+
'type' = 'paimon',
547+
'warehouse' = 'obs://<bucket>/<path>',
548+
'fs.obs.endpoint' = 'obs-endpoint-hostname',
549+
'fs.obs.access.key' = 'xxx',
550+
'fs.obs.secret.key' = 'yyy'
551+
);
552+
```
553+
554+
{{< /tab >}}
555+
556+
{{< tab "Spark" >}}
557+
558+
{{< hint info >}}
559+
If you have already configured obs access through Spark (Via Hadoop FileSystem), here you can skip the following configuration.
560+
{{< /hint >}}
561+
562+
Place `paimon-obs-{{< version >}}.jar` together with `paimon-spark-{{< version >}}.jar` under Spark's jars directory, and start like
563+
564+
```shell
565+
spark-sql \
566+
--conf spark.sql.catalog.paimon=org.apache.paimon.spark.SparkCatalog \
567+
--conf spark.sql.catalog.paimon.warehouse=obs://<bucket>/<path> \
568+
--conf spark.sql.catalog.paimon.fs.obs.endpoint=obs-endpoint-hostname \
569+
--conf spark.sql.catalog.paimon.fs.obs.access.key=xxx \
570+
--conf spark.sql.catalog.paimon.fs.obs.secret.key=yyy
571+
```
572+
573+
{{< /tab >}}
574+
575+
{{< tab "Hive" >}}
576+
577+
{{< hint info >}}
578+
If you have already configured obs access through Hive ((Via Hadoop FileSystem)), here you can skip the following configuration.
579+
{{< /hint >}}
580+
581+
NOTE: You need to ensure that Hive metastore can access `obs`.
582+
583+
Place `paimon-obs-{{< version >}}.jar` together with `paimon-hive-connector-{{< version >}}.jar` under Hive's auxlib directory, and start like
584+
585+
```sql
586+
SET paimon.fs.obs.endpoint=obs-endpoint-hostname;
587+
SET paimon.fs.obs.access.key=xxx;
588+
SET paimon.fs.obs.secret.key=yyy;
589+
```
590+
591+
And read table from hive metastore, table can be created by Flink or Spark, see [Catalog with Hive Metastore]({{< ref "flink/sql-ddl" >}})
592+
```sql
593+
SELECT * FROM test_table;
594+
SELECT COUNT(1) FROM test_table;
595+
```
596+
597+
{{< /tab >}}
598+
599+
{{< /tabs >}}
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!--
3+
~ Licensed to the Apache Software Foundation (ASF) under one
4+
~ or more contributor license agreements. See the NOTICE file
5+
~ distributed with this work for additional information
6+
~ regarding copyright ownership. The ASF licenses this file
7+
~ to you under the Apache License, Version 2.0 (the
8+
~ "License"); you may not use this file except in compliance
9+
~ with the License. You may obtain a copy of the License at
10+
~
11+
~ http://www.apache.org/licenses/LICENSE-2.0
12+
~
13+
~ Unless required by applicable law or agreed to in writing, software
14+
~ distributed under the License is distributed on an "AS IS" BASIS,
15+
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
~ See the License for the specific language governing permissions and
17+
~ limitations under the License.
18+
-->
19+
<project xmlns="http://maven.apache.org/POM/4.0.0"
20+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
21+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
22+
<modelVersion>4.0.0</modelVersion>
23+
<parent>
24+
<groupId>org.apache.paimon</groupId>
25+
<artifactId>paimon-filesystems</artifactId>
26+
<version>1.1-SNAPSHOT</version>
27+
</parent>
28+
29+
<artifactId>paimon-obs-impl</artifactId>
30+
<name>Paimon : FileSystems : OBS : Impl</name>
31+
<packaging>jar</packaging>
32+
33+
<properties>
34+
<fs.oss.sdk.version>3.13.2</fs.oss.sdk.version>
35+
<hadoop.huaweicloud.version>3.4.0</hadoop.huaweicloud.version>
36+
</properties>
37+
<dependencies>
38+
<dependency>
39+
<groupId>org.apache.paimon</groupId>
40+
<artifactId>paimon-hadoop-shaded</artifactId>
41+
<version>${project.version}</version>
42+
</dependency>
43+
44+
<dependency>
45+
<groupId>org.apache.paimon</groupId>
46+
<artifactId>paimon-common</artifactId>
47+
<version>${project.version}</version>
48+
<scope>provided</scope>
49+
</dependency>
50+
<dependency>
51+
<groupId>org.apache.hadoop</groupId>
52+
<artifactId>hadoop-huaweicloud</artifactId>
53+
<version>${hadoop.huaweicloud.version}</version>
54+
<exclusions>
55+
<exclusion>
56+
<!-- provided by paimon-hadoop-shaded -->
57+
<groupId>org.apache.hadoop</groupId>
58+
<artifactId>hadoop-common</artifactId>
59+
</exclusion>
60+
<exclusion>
61+
<groupId>ch.qos.reload4j</groupId>
62+
<artifactId>reload4j</artifactId>
63+
</exclusion>
64+
<exclusion>
65+
<groupId>org.slf4j</groupId>
66+
<artifactId>slf4j-reload4j</artifactId>
67+
</exclusion>
68+
</exclusions>
69+
</dependency>
70+
71+
<dependency>
72+
<!-- Hadoop requires jaxb-api for javax.xml.bind.JAXBException -->
73+
<groupId>javax.xml.bind</groupId>
74+
<artifactId>jaxb-api</artifactId>
75+
<version>${jaxb.api.version}</version>
76+
<!-- packaged as an optional dependency that is only accessible on Java 11+ -->
77+
<scope>provided</scope>
78+
</dependency>
79+
</dependencies>
80+
81+
<build>
82+
<plugins>
83+
84+
<!-- Relocate all OSS related classes -->
85+
<plugin>
86+
<groupId>org.apache.maven.plugins</groupId>
87+
<artifactId>maven-shade-plugin</artifactId>
88+
<executions>
89+
<execution>
90+
<id>shade-paimon</id>
91+
<phase>package</phase>
92+
<goals>
93+
<goal>shade</goal>
94+
</goals>
95+
<configuration>
96+
<artifactSet>
97+
<includes>
98+
<include>*:*</include>
99+
</includes>
100+
</artifactSet>
101+
<filters>
102+
<filter>
103+
<artifact>*</artifact>
104+
<excludes>
105+
<exclude>.gitkeep</exclude>
106+
<exclude>mime.types</exclude>
107+
<exclude>mozilla/**</exclude>
108+
<exclude>okhttp3/internal/publicsuffix/NOTICE</exclude>
109+
</excludes>
110+
</filter>
111+
</filters>
112+
</configuration>
113+
</execution>
114+
</executions>
115+
</plugin>
116+
</plugins>
117+
</build>
118+
</project>

0 commit comments

Comments
 (0)