Test-wsg-003/cats_vs_dogs_pipeline(1).yaml

335 lines
14 KiB
YAML
Raw Permalink Normal View History

# PIPELINE DEFINITION
# Name: cats-vs-dogs-classification-from-minio
# Description: A pipeline to train and evaluate a cats-vs-dogs classifier using dataset from MinIO.
# Inputs:
# batch_size: int [Default: 32.0]
# bucket_name: str [Default: 'xrwang']
# epochs: int [Default: 10.0]
# local_zip_path: str [Default: '/tmp/cat-dog_data.zip']
# minio_access_key: str [Default: 'LEINAOYUNOS']
# minio_endpoint: str [Default: 'miniogw-dev2.cnbita.com:11443']
# minio_secret_key: str [Default: 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYLEINAOYUNKEY']
# object_name: str [Default: 'catdog/cat-dog_data.zip']
# secure: bool [Default: True]
# Outputs:
# evaluate-model-metrics: system.Metrics
components:
#组件的输入输出类型
comp-download-and-prepare-data-from-minio:
executorLabel: exec-download-and-prepare-data-from-minio
inputDefinitions:
parameters:
bucket_name:
parameterType: STRING
local_zip_path:
parameterType: STRING
minio_access_key:
parameterType: STRING
minio_endpoint:
parameterType: STRING
minio_secret_key:
parameterType: STRING
object_name:
parameterType: STRING
secure:
defaultValue: true
isOptional: true
parameterType: BOOLEAN
outputDefinitions:
artifacts:
dataset_output:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
comp-evaluate-model:
executorLabel: exec-evaluate-model
inputDefinitions:
artifacts:
dataset_input:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
model_dir:
artifactType:
schemaTitle: system.Model
schemaVersion: 0.0.1
outputDefinitions:
artifacts:
metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
comp-train-model:
executorLabel: exec-train-model
inputDefinitions:
artifacts:
dataset_input:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
parameters:
batch_size:
parameterType: NUMBER_INTEGER
epochs:
parameterType: NUMBER_INTEGER
outputDefinitions:
artifacts:
model_output:
artifactType:
schemaTitle: system.Model
schemaVersion: 0.0.1
defaultPipelineRoot: minio://xrwang/artifacts/
deploymentSpec:
executors:
exec-download-and-prepare-data-from-minio:
container:
args:
- --executor_input
- "{{$}}"
- --function_to_execute
- download_and_prepare_data_from_minio
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.7.0'\
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\
\ python3 -m pip install --quiet --no-warn-script-location 'minio' 'pandas'\
\ 'tensorflow' 'pathlib' 'zipfile36' && \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef download_and_prepare_data_from_minio(\n dataset_output: Output[Dataset],\n\
\ minio_endpoint: str,\n minio_access_key: str,\n minio_secret_key:\
\ str,\n bucket_name: str,\n object_name: str,\n local_zip_path:\
\ str,\n secure: bool = True\n):\n from minio import Minio\n from\
\ zipfile import ZipFile\n from pathlib import Path\n import os\n\
\ client = Minio(\n minio_endpoint,\n access_key=minio_access_key,\n\
\ secret_key=minio_secret_key,\n secure=secure\n )\n \
\ print (\"download_and_prepare_data_from_minio\")\n client.fget_object(bucket_name,\
\ object_name, local_zip_path)\n with ZipFile(local_zip_path, 'r') as\
\ zip_ref:\n zip_ref.extractall(path=dataset_output.path)\n os.remove(local_zip_path)\n\
\ print(f\"Data prepared at: {dataset_output.path}\")\n\n"
image: python:3.7
exec-evaluate-model:
container:
args:
- --executor_input
- "{{$}}"
- --function_to_execute
- evaluate_model
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.7.0'\
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\
\ python3 -m pip install --quiet --no-warn-script-location 'tensorflow'\
\ 'pandas' && \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef evaluate_model(\n model_dir: Input[Model],\n dataset_input:\
\ Input[Dataset],\n metrics: Output[Metrics]\n):\n import tensorflow\
\ as tf\n import os\n\n # \u52A0\u8F7D\u6A21\u578B\n model = tf.keras.models.load_model(model_dir.path)\n\
\n # \u52A0\u8F7D\u9A8C\u8BC1\u6570\u636E\u96C6\n validation_dir =\
\ os.path.join(dataset_input.path, 'validation')\n validation_dataset\
\ = tf.keras.utils.image_dataset_from_directory(\n validation_dir,\n\
\ image_size=(180, 180),\n batch_size=32)\n\n # \u8BC4\u4F30\
\u6A21\u578B\n eval_result = model.evaluate(validation_dataset)\n\n \
\ # \u8BB0\u5F55\u8BC4\u4F30\u7ED3\u679C\n metrics.log_metric(\"loss\"\
, eval_result[0])\n metrics.log_metric(\"accuracy\", eval_result[1])\n\
\n"
image: registry.cnbita.com:5000/kubeflow-pipelines/python-tensorflow:3.7
exec-train-model:
container:
args:
- --executor_input
- "{{$}}"
- --function_to_execute
- train_model
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.7.0'\
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\
\ python3 -m pip install --quiet --no-warn-script-location 'tensorflow'\
\ 'numpy' && \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef train_model(\n dataset_input: Input[Dataset],\n model_output:\
\ Output[Model],\n epochs: int,\n batch_size: int\n):\n import\
\ tensorflow as tf\n import os\n train_dir = os.path.join(dataset_input.path,\
\ 'train')\n validation_dir = os.path.join(dataset_input.path, 'validation')\n\
\n train_dataset = tf.keras.utils.image_dataset_from_directory(\n \
\ train_dir,\n image_size=(180, 180),\n batch_size=batch_size)\n\
\n validation_dataset = tf.keras.utils.image_dataset_from_directory(\n\
\ validation_dir,\n image_size=(180, 180),\n batch_size=batch_size)\n\
\n model = tf.keras.Sequential([\n tf.keras.layers.Rescaling(1./255),\n\
\ tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu'),\n\
\ tf.keras.layers.MaxPooling2D(),\n tf.keras.layers.Conv2D(32,\
\ 3, padding='same', activation='relu'),\n tf.keras.layers.MaxPooling2D(),\n\
\ tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),\n\
\ tf.keras.layers.MaxPooling2D(),\n tf.keras.layers.Flatten(),\n\
\ tf.keras.layers.Dense(128, activation='relu'), \n tf.keras.layers.Dense(2)\n\
\n ])\n\n model.compile(optimizer='adam',\n loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n\
\ metrics=['accuracy'])\n\n model.fit(train_dataset,\
\ validation_data=validation_dataset, epochs=epochs, batch_size=batch_size)\n\
\ model.save(model_output.path)\n\n"
image: registry.cnbita.com:5000/kubeflow-pipelines/python-tensorflow:3.7
pipelineInfo:
description:
A pipeline to train and evaluate a cats-vs-dogs classifier using dataset
from MinIO.
name: cats-vs-dogs-classification-from-minio
root:
dag:
outputs:
artifacts:
evaluate-model-metrics:
artifactSelectors:
- outputArtifactKey: metrics
producerSubtask: evaluate-model
#·、该组件的输入参数来源哪个组件 2、组件之间的依赖 3、组件的名称唯一标识
tasks:
download-and-prepare-data-from-minio:
cachingOptions:
enableCache: true
componentRef:
name: comp-download-and-prepare-data-from-minio
inputs:
parameters:
bucket_name:
componentInputParameter: bucket_name
local_zip_path:
componentInputParameter: local_zip_path
minio_access_key:
componentInputParameter: minio_access_key
minio_endpoint:
componentInputParameter: minio_endpoint
minio_secret_key:
componentInputParameter: minio_secret_key
object_name:
componentInputParameter: object_name
secure:
componentInputParameter: secure
taskInfo:
name: download-and-prepare-data-from-minio
evaluate-model:
cachingOptions: {}
#当前组件evaluate-model的定义来源comp-evaluate-model
componentRef:
name: comp-evaluate-model
#放在哪些组件之后跑
dependentTasks:
- download-and-prepare-data-from-minio
- train-model
# 该组件的输入参数来源哪个组件
inputs:
artifacts:
dataset_input:
taskOutputArtifact:
outputArtifactKey: dataset_output
producerTask: download-and-prepare-data-from-minio
model_dir:
taskOutputArtifact:
outputArtifactKey: model_output
producerTask: train-model
taskInfo:
name: evaluate-model
train-model:
cachingOptions: {}
componentRef:
name: comp-train-model
dependentTasks:
- download-and-prepare-data-from-minio
inputs:
artifacts:
dataset_input:
taskOutputArtifact:
outputArtifactKey: dataset_output
producerTask: download-and-prepare-data-from-minio
parameters:
batch_size:
componentInputParameter: batch_size
epochs:
componentInputParameter: epochs
taskInfo:
name: train-model
inputDefinitions:
parameters:
batch_size:
defaultValue: 32.0
isOptional: true
parameterType: NUMBER_INTEGER
bucket_name:
defaultValue: xrwang
isOptional: true
parameterType: STRING
epochs:
defaultValue: 10.0
isOptional: true
parameterType: NUMBER_INTEGER
local_zip_path:
defaultValue: /tmp/cat-dog_data.zip
isOptional: true
parameterType: STRING
minio_access_key:
defaultValue: LEINAOYUNOS
isOptional: true
parameterType: STRING
minio_endpoint:
defaultValue: miniogw-dev2.cnbita.com:11443
isOptional: true
parameterType: STRING
minio_secret_key:
defaultValue: wJalrXUtnFEMI/K7MDENG/bPxRfiCYLEINAOYUNKEY
isOptional: true
parameterType: STRING
object_name:
defaultValue: catdog/cat-dog_data.zip
isOptional: true
parameterType: STRING
secure:
defaultValue: true
isOptional: true
parameterType: BOOLEAN
outputDefinitions:
artifacts:
evaluate-model-metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
schemaVersion: 2.1.0
sdkVersion: kfp-2.7.0