|
| 1 | +# syntax=docker/dockerfile:1.16.0@sha256:e2dd261f92e4b763d789984f6eab84be66ab4f5f08052316d8eb8f173593acf7 |
| 2 | +# check=error=true |
| 3 | + |
| 4 | +FROM local-image/java-devel AS hadoop-builder |
| 5 | + |
| 6 | +ARG PRODUCT_VERSION |
| 7 | +ARG RELEASE_VERSION |
| 8 | +ARG PROTOBUF_VERSION |
| 9 | +ARG STACKABLE_USER_UID |
| 10 | + |
| 11 | +WORKDIR /stackable |
| 12 | + |
| 13 | +COPY --chown=${STACKABLE_USER_UID}:0 shared/protobuf/stackable/patches/patchable.toml /stackable/src/shared/protobuf/stackable/patches/patchable.toml |
| 14 | +COPY --chown=${STACKABLE_USER_UID}:0 shared/protobuf/stackable/patches/${PROTOBUF_VERSION} /stackable/src/shared/protobuf/stackable/patches/${PROTOBUF_VERSION} |
| 15 | + |
| 16 | +RUN <<EOF |
| 17 | +rpm --install --replacepkgs https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm |
| 18 | +microdnf update |
| 19 | +# boost is a build dependency starting in Hadoop 3.4.0 if compiling native code |
| 20 | +# automake and libtool are required to build protobuf |
| 21 | +microdnf install boost1.78-devel automake libtool |
| 22 | +microdnf clean all |
| 23 | +rm -rf /var/cache/yum |
| 24 | +mkdir /opt/protobuf |
| 25 | +chown ${STACKABLE_USER_UID}:0 /opt/protobuf |
| 26 | +EOF |
| 27 | + |
| 28 | +USER ${STACKABLE_USER_UID} |
| 29 | +# This Protobuf version is the exact version as used in the Hadoop Dockerfile |
| 30 | +# See https://github.com/apache/hadoop/blob/trunk/dev-support/docker/pkg-resolver/install-protobuf.sh |
| 31 | +# (this was hardcoded in the Dockerfile in earlier versions of Hadoop, make sure to look at the exact version in Github) |
| 32 | +RUN <<EOF |
| 33 | + cd "$(/stackable/patchable --images-repo-root=src checkout shared/protobuf ${PROTOBUF_VERSION})" |
| 34 | + |
| 35 | + # Create snapshot of the source code including custom patches |
| 36 | + tar -czf /stackable/protobuf-${PROTOBUF_VERSION}-src.tar.gz . |
| 37 | + |
| 38 | + ./autogen.sh |
| 39 | + ./configure --prefix=/opt/protobuf |
| 40 | + make "-j$(nproc)" |
| 41 | + make install |
| 42 | + (cd .. && rm -r ${PROTOBUF_VERSION}) |
| 43 | +EOF |
| 44 | + |
| 45 | +ENV PROTOBUF_HOME=/opt/protobuf |
| 46 | +ENV PATH="${PATH}:/opt/protobuf/bin" |
| 47 | + |
| 48 | +WORKDIR /build |
| 49 | +COPY --chown=${STACKABLE_USER_UID}:0 precompiled/hadoop/stackable/patches/patchable.toml /build/src/precompiled/hadoop/stackable/patches/patchable.toml |
| 50 | +COPY --chown=${STACKABLE_USER_UID}:0 precompiled/hadoop/stackable/patches/${PRODUCT_VERSION} /build/src/precompiled/hadoop/stackable/patches/${PRODUCT_VERSION} |
| 51 | +COPY --chown=${STACKABLE_USER_UID}:0 precompiled/hadoop/stackable/fuse_dfs_wrapper /build |
| 52 | +USER ${STACKABLE_USER_UID} |
| 53 | +# Hadoop Pipes requires libtirpc to build, whose headers are not packaged in RedHat UBI, so skip building this module |
| 54 | +# Build from source to enable FUSE module, and to apply custom patches. |
| 55 | +# Also skip building the yarn, mapreduce and minicluster modules: this will result in the modules being excluded but not all |
| 56 | +# jar files will be stripped if they are needed elsewhere e.g. share/hadoop/yarn will not be part of the build, but yarn jars |
| 57 | +# will still exist in share/hadoop/tools as they would be needed by the resource estimator tool. Such jars are removed in a later step. |
| 58 | +RUN <<EOF |
| 59 | +cd "$(/stackable/patchable --images-repo-root=src checkout precompiled/hadoop ${PRODUCT_VERSION})" |
| 60 | + |
| 61 | +ORIGINAL_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout) |
| 62 | +NEW_VERSION=${PRODUCT_VERSION}-stackable${RELEASE_VERSION} |
| 63 | + |
| 64 | +mvn versions:set -DnewVersion=${NEW_VERSION} |
| 65 | + |
| 66 | +# Since we skip building the hadoop-pipes module, we need to set the version to the original version so it can be pulled from Maven Central |
| 67 | +sed -e '/<artifactId>hadoop-pipes<\/artifactId>/,/<\/dependency>/ { s/<version>.*<\/version>/<version>'"$ORIGINAL_VERSION"'<\/version>/ }' -i hadoop-tools/hadoop-tools-dist/pom.xml |
| 68 | + |
| 69 | +# Create snapshot of the source code including custom patches |
| 70 | +tar -czf /stackable/hadoop-${NEW_VERSION}-src.tar.gz . |
| 71 | + |
| 72 | +# We do not pass require.snappy because that is only built in to the MapReduce client and we don't need that |
| 73 | +# |
| 74 | +# Passing require.openssl SHOULD make the build fail if OpenSSL is not present. |
| 75 | +# This does not work properly however because this builder image contains the openssl-devel package which creates a symlink from /usr/lib64/libcrypto.so to the real version. |
| 76 | +# Therefore, this build does work but the final image does NOT contain the openssl-devel package which is why it fails there which is why we have to create the symlink over there manually. |
| 77 | +# We still leave this flag in to automatically fail should anything with the packages or symlinks ever fail. |
| 78 | +mvn \ |
| 79 | + clean package install \ |
| 80 | + -Pdist,native \ |
| 81 | + -pl '!hadoop-tools/hadoop-pipes' \ |
| 82 | + -Dhadoop.version=${NEW_VERSION} \ |
| 83 | + -Drequire.fuse=true \ |
| 84 | + -Drequire.openssl=true \ |
| 85 | + -DskipTests \ |
| 86 | + -Dmaven.javadoc.skip=true |
| 87 | + |
| 88 | +mkdir -p /stackable/patched-libs/maven/org/apache |
| 89 | +cp -r /stackable/.m2/repository/org/apache/hadoop /stackable/patched-libs/maven/org/apache |
| 90 | + |
| 91 | +rm -rf hadoop-dist/target/hadoop-${NEW_VERSION}/share/hadoop/yarn |
| 92 | +rm -rf hadoop-dist/target/hadoop-${NEW_VERSION}/share/hadoop/mapreduce |
| 93 | +rm hadoop-dist/target/hadoop-${NEW_VERSION}/share/hadoop/client/hadoop-client-minicluster-*.jar |
| 94 | +rm hadoop-dist/target/hadoop-${NEW_VERSION}/share/hadoop/tools/lib/hadoop-minicluster-*.jar |
| 95 | + |
| 96 | +cp -r hadoop-dist/target/hadoop-${NEW_VERSION} /stackable/hadoop-${NEW_VERSION} |
| 97 | +sed -i "s/${NEW_VERSION}/${ORIGINAL_VERSION}/g" hadoop-dist/target/bom.json |
| 98 | +mv hadoop-dist/target/bom.json /stackable/hadoop-${NEW_VERSION}/hadoop-${NEW_VERSION}.cdx.json |
| 99 | + |
| 100 | +# HDFS fuse-dfs is not part of the regular dist output, so we need to copy it in ourselves |
| 101 | +cp hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/fuse-dfs/fuse_dfs /stackable/hadoop-${NEW_VERSION}/bin |
| 102 | + |
| 103 | +# Remove source code |
| 104 | +(cd .. && rm -r ${PRODUCT_VERSION}) |
| 105 | + |
| 106 | +ln -s /stackable/hadoop-${NEW_VERSION} /stackable/hadoop |
| 107 | + |
| 108 | +mv /build/fuse_dfs_wrapper /stackable/hadoop/bin |
| 109 | + |
| 110 | +# Remove unneeded binaries: |
| 111 | +# - code sources |
| 112 | +# - mapreduce/yarn binaries that were built as cross-project dependencies |
| 113 | +# - minicluster (only used for testing) and test .jars |
| 114 | +# - json-io: this is a transitive dependency pulled in by cedarsoft/java-utils/json-io and is excluded in 3.4.0. See CVE-2023-34610. |
| 115 | +rm -rf /stackable/hadoop/share/hadoop/common/sources/ |
| 116 | +rm -rf /stackable/hadoop/share/hadoop/hdfs/sources/ |
| 117 | +rm -rf /stackable/hadoop/share/hadoop/tools/sources/ |
| 118 | +rm -rf /stackable/hadoop/share/hadoop/tools/lib/json-io-*.jar |
| 119 | +rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-mapreduce-client-*.jar |
| 120 | +rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-yarn-server*.jar |
| 121 | +find /stackable/hadoop -name 'hadoop-minicluster-*.jar' -type f -delete |
| 122 | +find /stackable/hadoop -name 'hadoop-client-minicluster-*.jar' -type f -delete |
| 123 | +find /stackable/hadoop -name 'hadoop-*tests.jar' -type f -delete |
| 124 | +rm -rf /stackable/.m2 |
| 125 | + |
| 126 | +# Set correct groups; make sure only required artifacts for the final image are located in /stackable |
| 127 | +chmod -R g=u /stackable |
| 128 | +EOF |
0 commit comments