335 lines
14 KiB
YAML
335 lines
14 KiB
YAML
|
# PIPELINE DEFINITION
|
|||
|
# Name: cats-vs-dogs-classification-from-minio
|
|||
|
# Description: A pipeline to train and evaluate a cats-vs-dogs classifier using dataset from MinIO.
|
|||
|
# Inputs:
|
|||
|
# batch_size: int [Default: 32.0]
|
|||
|
# bucket_name: str [Default: 'xrwang']
|
|||
|
# epochs: int [Default: 10.0]
|
|||
|
# local_zip_path: str [Default: '/tmp/cat-dog_data.zip']
|
|||
|
# minio_access_key: str [Default: 'LEINAOYUNOS']
|
|||
|
# minio_endpoint: str [Default: 'miniogw-dev2.cnbita.com:11443']
|
|||
|
# minio_secret_key: str [Default: 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYLEINAOYUNKEY']
|
|||
|
# object_name: str [Default: 'catdog/cat-dog_data.zip']
|
|||
|
# secure: bool [Default: True]
|
|||
|
# Outputs:
|
|||
|
# evaluate-model-metrics: system.Metrics
|
|||
|
|
|||
|
components:
|
|||
|
#组件的输入输出类型
|
|||
|
comp-download-and-prepare-data-from-minio:
|
|||
|
executorLabel: exec-download-and-prepare-data-from-minio
|
|||
|
inputDefinitions:
|
|||
|
parameters:
|
|||
|
bucket_name:
|
|||
|
parameterType: STRING
|
|||
|
local_zip_path:
|
|||
|
parameterType: STRING
|
|||
|
minio_access_key:
|
|||
|
parameterType: STRING
|
|||
|
minio_endpoint:
|
|||
|
parameterType: STRING
|
|||
|
minio_secret_key:
|
|||
|
parameterType: STRING
|
|||
|
object_name:
|
|||
|
parameterType: STRING
|
|||
|
secure:
|
|||
|
defaultValue: true
|
|||
|
isOptional: true
|
|||
|
parameterType: BOOLEAN
|
|||
|
outputDefinitions:
|
|||
|
artifacts:
|
|||
|
dataset_output:
|
|||
|
artifactType:
|
|||
|
schemaTitle: system.Dataset
|
|||
|
schemaVersion: 0.0.1
|
|||
|
comp-evaluate-model:
|
|||
|
executorLabel: exec-evaluate-model
|
|||
|
inputDefinitions:
|
|||
|
artifacts:
|
|||
|
dataset_input:
|
|||
|
artifactType:
|
|||
|
schemaTitle: system.Dataset
|
|||
|
schemaVersion: 0.0.1
|
|||
|
model_dir:
|
|||
|
artifactType:
|
|||
|
schemaTitle: system.Model
|
|||
|
schemaVersion: 0.0.1
|
|||
|
outputDefinitions:
|
|||
|
artifacts:
|
|||
|
metrics:
|
|||
|
artifactType:
|
|||
|
schemaTitle: system.Metrics
|
|||
|
schemaVersion: 0.0.1
|
|||
|
comp-train-model:
|
|||
|
executorLabel: exec-train-model
|
|||
|
inputDefinitions:
|
|||
|
artifacts:
|
|||
|
dataset_input:
|
|||
|
artifactType:
|
|||
|
schemaTitle: system.Dataset
|
|||
|
schemaVersion: 0.0.1
|
|||
|
parameters:
|
|||
|
batch_size:
|
|||
|
parameterType: NUMBER_INTEGER
|
|||
|
epochs:
|
|||
|
parameterType: NUMBER_INTEGER
|
|||
|
outputDefinitions:
|
|||
|
artifacts:
|
|||
|
model_output:
|
|||
|
artifactType:
|
|||
|
schemaTitle: system.Model
|
|||
|
schemaVersion: 0.0.1
|
|||
|
defaultPipelineRoot: minio://xrwang/artifacts/
|
|||
|
deploymentSpec:
|
|||
|
executors:
|
|||
|
exec-download-and-prepare-data-from-minio:
|
|||
|
container:
|
|||
|
args:
|
|||
|
- --executor_input
|
|||
|
- "{{$}}"
|
|||
|
- --function_to_execute
|
|||
|
- download_and_prepare_data_from_minio
|
|||
|
command:
|
|||
|
- sh
|
|||
|
- -c
|
|||
|
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
|
|||
|
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
|
|||
|
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.7.0'\
|
|||
|
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\
|
|||
|
\ python3 -m pip install --quiet --no-warn-script-location 'minio' 'pandas'\
|
|||
|
\ 'tensorflow' 'pathlib' 'zipfile36' && \"$0\" \"$@\"\n"
|
|||
|
- sh
|
|||
|
- -ec
|
|||
|
- 'program_path=$(mktemp -d)
|
|||
|
|
|||
|
|
|||
|
printf "%s" "$0" > "$program_path/ephemeral_component.py"
|
|||
|
|
|||
|
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
|
|||
|
|
|||
|
'
|
|||
|
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
|
|||
|
\ *\n\ndef download_and_prepare_data_from_minio(\n dataset_output: Output[Dataset],\n\
|
|||
|
\ minio_endpoint: str,\n minio_access_key: str,\n minio_secret_key:\
|
|||
|
\ str,\n bucket_name: str,\n object_name: str,\n local_zip_path:\
|
|||
|
\ str,\n secure: bool = True\n):\n from minio import Minio\n from\
|
|||
|
\ zipfile import ZipFile\n from pathlib import Path\n import os\n\
|
|||
|
\ client = Minio(\n minio_endpoint,\n access_key=minio_access_key,\n\
|
|||
|
\ secret_key=minio_secret_key,\n secure=secure\n )\n \
|
|||
|
\ print (\"download_and_prepare_data_from_minio\")\n client.fget_object(bucket_name,\
|
|||
|
\ object_name, local_zip_path)\n with ZipFile(local_zip_path, 'r') as\
|
|||
|
\ zip_ref:\n zip_ref.extractall(path=dataset_output.path)\n os.remove(local_zip_path)\n\
|
|||
|
\ print(f\"Data prepared at: {dataset_output.path}\")\n\n"
|
|||
|
image: python:3.7
|
|||
|
exec-evaluate-model:
|
|||
|
container:
|
|||
|
args:
|
|||
|
- --executor_input
|
|||
|
- "{{$}}"
|
|||
|
- --function_to_execute
|
|||
|
- evaluate_model
|
|||
|
command:
|
|||
|
- sh
|
|||
|
- -c
|
|||
|
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
|
|||
|
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
|
|||
|
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.7.0'\
|
|||
|
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\
|
|||
|
\ python3 -m pip install --quiet --no-warn-script-location 'tensorflow'\
|
|||
|
\ 'pandas' && \"$0\" \"$@\"\n"
|
|||
|
- sh
|
|||
|
- -ec
|
|||
|
- 'program_path=$(mktemp -d)
|
|||
|
|
|||
|
|
|||
|
printf "%s" "$0" > "$program_path/ephemeral_component.py"
|
|||
|
|
|||
|
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
|
|||
|
|
|||
|
'
|
|||
|
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
|
|||
|
\ *\n\ndef evaluate_model(\n model_dir: Input[Model],\n dataset_input:\
|
|||
|
\ Input[Dataset],\n metrics: Output[Metrics]\n):\n import tensorflow\
|
|||
|
\ as tf\n import os\n\n # \u52A0\u8F7D\u6A21\u578B\n model = tf.keras.models.load_model(model_dir.path)\n\
|
|||
|
\n # \u52A0\u8F7D\u9A8C\u8BC1\u6570\u636E\u96C6\n validation_dir =\
|
|||
|
\ os.path.join(dataset_input.path, 'validation')\n validation_dataset\
|
|||
|
\ = tf.keras.utils.image_dataset_from_directory(\n validation_dir,\n\
|
|||
|
\ image_size=(180, 180),\n batch_size=32)\n\n # \u8BC4\u4F30\
|
|||
|
\u6A21\u578B\n eval_result = model.evaluate(validation_dataset)\n\n \
|
|||
|
\ # \u8BB0\u5F55\u8BC4\u4F30\u7ED3\u679C\n metrics.log_metric(\"loss\"\
|
|||
|
, eval_result[0])\n metrics.log_metric(\"accuracy\", eval_result[1])\n\
|
|||
|
\n"
|
|||
|
image: registry.cnbita.com:5000/kubeflow-pipelines/python-tensorflow:3.7
|
|||
|
exec-train-model:
|
|||
|
container:
|
|||
|
args:
|
|||
|
- --executor_input
|
|||
|
- "{{$}}"
|
|||
|
- --function_to_execute
|
|||
|
- train_model
|
|||
|
command:
|
|||
|
- sh
|
|||
|
- -c
|
|||
|
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
|
|||
|
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
|
|||
|
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.7.0'\
|
|||
|
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\
|
|||
|
\ python3 -m pip install --quiet --no-warn-script-location 'tensorflow'\
|
|||
|
\ 'numpy' && \"$0\" \"$@\"\n"
|
|||
|
- sh
|
|||
|
- -ec
|
|||
|
- 'program_path=$(mktemp -d)
|
|||
|
|
|||
|
|
|||
|
printf "%s" "$0" > "$program_path/ephemeral_component.py"
|
|||
|
|
|||
|
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
|
|||
|
|
|||
|
'
|
|||
|
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
|
|||
|
\ *\n\ndef train_model(\n dataset_input: Input[Dataset],\n model_output:\
|
|||
|
\ Output[Model],\n epochs: int,\n batch_size: int\n):\n import\
|
|||
|
\ tensorflow as tf\n import os\n train_dir = os.path.join(dataset_input.path,\
|
|||
|
\ 'train')\n validation_dir = os.path.join(dataset_input.path, 'validation')\n\
|
|||
|
\n train_dataset = tf.keras.utils.image_dataset_from_directory(\n \
|
|||
|
\ train_dir,\n image_size=(180, 180),\n batch_size=batch_size)\n\
|
|||
|
\n validation_dataset = tf.keras.utils.image_dataset_from_directory(\n\
|
|||
|
\ validation_dir,\n image_size=(180, 180),\n batch_size=batch_size)\n\
|
|||
|
\n model = tf.keras.Sequential([\n tf.keras.layers.Rescaling(1./255),\n\
|
|||
|
\ tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu'),\n\
|
|||
|
\ tf.keras.layers.MaxPooling2D(),\n tf.keras.layers.Conv2D(32,\
|
|||
|
\ 3, padding='same', activation='relu'),\n tf.keras.layers.MaxPooling2D(),\n\
|
|||
|
\ tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),\n\
|
|||
|
\ tf.keras.layers.MaxPooling2D(),\n tf.keras.layers.Flatten(),\n\
|
|||
|
\ tf.keras.layers.Dense(128, activation='relu'), \n tf.keras.layers.Dense(2)\n\
|
|||
|
\n ])\n\n model.compile(optimizer='adam',\n loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n\
|
|||
|
\ metrics=['accuracy'])\n\n model.fit(train_dataset,\
|
|||
|
\ validation_data=validation_dataset, epochs=epochs, batch_size=batch_size)\n\
|
|||
|
\ model.save(model_output.path)\n\n"
|
|||
|
image: registry.cnbita.com:5000/kubeflow-pipelines/python-tensorflow:3.7
|
|||
|
pipelineInfo:
|
|||
|
description:
|
|||
|
A pipeline to train and evaluate a cats-vs-dogs classifier using dataset
|
|||
|
from MinIO.
|
|||
|
name: cats-vs-dogs-classification-from-minio
|
|||
|
root:
|
|||
|
dag:
|
|||
|
outputs:
|
|||
|
artifacts:
|
|||
|
evaluate-model-metrics:
|
|||
|
artifactSelectors:
|
|||
|
- outputArtifactKey: metrics
|
|||
|
producerSubtask: evaluate-model
|
|||
|
#·、该组件的输入参数来源哪个组件 2、组件之间的依赖 3、组件的名称(唯一标识)
|
|||
|
tasks:
|
|||
|
download-and-prepare-data-from-minio:
|
|||
|
cachingOptions:
|
|||
|
enableCache: true
|
|||
|
componentRef:
|
|||
|
name: comp-download-and-prepare-data-from-minio
|
|||
|
inputs:
|
|||
|
parameters:
|
|||
|
bucket_name:
|
|||
|
componentInputParameter: bucket_name
|
|||
|
local_zip_path:
|
|||
|
componentInputParameter: local_zip_path
|
|||
|
minio_access_key:
|
|||
|
componentInputParameter: minio_access_key
|
|||
|
minio_endpoint:
|
|||
|
componentInputParameter: minio_endpoint
|
|||
|
minio_secret_key:
|
|||
|
componentInputParameter: minio_secret_key
|
|||
|
object_name:
|
|||
|
componentInputParameter: object_name
|
|||
|
secure:
|
|||
|
componentInputParameter: secure
|
|||
|
taskInfo:
|
|||
|
name: download-and-prepare-data-from-minio
|
|||
|
evaluate-model:
|
|||
|
cachingOptions: {}
|
|||
|
#当前组件(evaluate-model)的定义来源(comp-evaluate-model)
|
|||
|
componentRef:
|
|||
|
name: comp-evaluate-model
|
|||
|
#放在哪些组件之后跑
|
|||
|
dependentTasks:
|
|||
|
- download-and-prepare-data-from-minio
|
|||
|
- train-model
|
|||
|
# 该组件的输入参数来源哪个组件
|
|||
|
inputs:
|
|||
|
artifacts:
|
|||
|
dataset_input:
|
|||
|
taskOutputArtifact:
|
|||
|
outputArtifactKey: dataset_output
|
|||
|
producerTask: download-and-prepare-data-from-minio
|
|||
|
model_dir:
|
|||
|
taskOutputArtifact:
|
|||
|
outputArtifactKey: model_output
|
|||
|
producerTask: train-model
|
|||
|
taskInfo:
|
|||
|
name: evaluate-model
|
|||
|
train-model:
|
|||
|
cachingOptions: {}
|
|||
|
componentRef:
|
|||
|
name: comp-train-model
|
|||
|
dependentTasks:
|
|||
|
- download-and-prepare-data-from-minio
|
|||
|
inputs:
|
|||
|
artifacts:
|
|||
|
dataset_input:
|
|||
|
taskOutputArtifact:
|
|||
|
outputArtifactKey: dataset_output
|
|||
|
producerTask: download-and-prepare-data-from-minio
|
|||
|
parameters:
|
|||
|
batch_size:
|
|||
|
componentInputParameter: batch_size
|
|||
|
epochs:
|
|||
|
componentInputParameter: epochs
|
|||
|
taskInfo:
|
|||
|
name: train-model
|
|||
|
inputDefinitions:
|
|||
|
parameters:
|
|||
|
batch_size:
|
|||
|
defaultValue: 32.0
|
|||
|
isOptional: true
|
|||
|
parameterType: NUMBER_INTEGER
|
|||
|
bucket_name:
|
|||
|
defaultValue: xrwang
|
|||
|
isOptional: true
|
|||
|
parameterType: STRING
|
|||
|
epochs:
|
|||
|
defaultValue: 10.0
|
|||
|
isOptional: true
|
|||
|
parameterType: NUMBER_INTEGER
|
|||
|
local_zip_path:
|
|||
|
defaultValue: /tmp/cat-dog_data.zip
|
|||
|
isOptional: true
|
|||
|
parameterType: STRING
|
|||
|
minio_access_key:
|
|||
|
defaultValue: LEINAOYUNOS
|
|||
|
isOptional: true
|
|||
|
parameterType: STRING
|
|||
|
minio_endpoint:
|
|||
|
defaultValue: miniogw-dev2.cnbita.com:11443
|
|||
|
isOptional: true
|
|||
|
parameterType: STRING
|
|||
|
minio_secret_key:
|
|||
|
defaultValue: wJalrXUtnFEMI/K7MDENG/bPxRfiCYLEINAOYUNKEY
|
|||
|
isOptional: true
|
|||
|
parameterType: STRING
|
|||
|
object_name:
|
|||
|
defaultValue: catdog/cat-dog_data.zip
|
|||
|
isOptional: true
|
|||
|
parameterType: STRING
|
|||
|
secure:
|
|||
|
defaultValue: true
|
|||
|
isOptional: true
|
|||
|
parameterType: BOOLEAN
|
|||
|
outputDefinitions:
|
|||
|
artifacts:
|
|||
|
evaluate-model-metrics:
|
|||
|
artifactType:
|
|||
|
schemaTitle: system.Metrics
|
|||
|
schemaVersion: 0.0.1
|
|||
|
schemaVersion: 2.1.0
|
|||
|
sdkVersion: kfp-2.7.0
|