335 lines
14 KiB
YAML
335 lines
14 KiB
YAML
# PIPELINE DEFINITION
|
||
# Name: cats-vs-dogs-classification-from-minio
|
||
# Description: A pipeline to train and evaluate a cats-vs-dogs classifier using dataset from MinIO.
|
||
# Inputs:
|
||
# batch_size: int [Default: 32.0]
|
||
# bucket_name: str [Default: 'xrwang']
|
||
# epochs: int [Default: 10.0]
|
||
# local_zip_path: str [Default: '/tmp/cat-dog_data.zip']
|
||
# minio_access_key: str [Default: 'LEINAOYUNOS']
|
||
# minio_endpoint: str [Default: 'miniogw-dev2.cnbita.com:11443']
|
||
# minio_secret_key: str [Default: 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYLEINAOYUNKEY']
|
||
# object_name: str [Default: 'catdog/cat-dog_data.zip']
|
||
# secure: bool [Default: True]
|
||
# Outputs:
|
||
# evaluate-model-metrics: system.Metrics
|
||
|
||
components:
|
||
#组件的输入输出类型
|
||
comp-download-and-prepare-data-from-minio:
|
||
executorLabel: exec-download-and-prepare-data-from-minio
|
||
inputDefinitions:
|
||
parameters:
|
||
bucket_name:
|
||
parameterType: STRING
|
||
local_zip_path:
|
||
parameterType: STRING
|
||
minio_access_key:
|
||
parameterType: STRING
|
||
minio_endpoint:
|
||
parameterType: STRING
|
||
minio_secret_key:
|
||
parameterType: STRING
|
||
object_name:
|
||
parameterType: STRING
|
||
secure:
|
||
defaultValue: true
|
||
isOptional: true
|
||
parameterType: BOOLEAN
|
||
outputDefinitions:
|
||
artifacts:
|
||
dataset_output:
|
||
artifactType:
|
||
schemaTitle: system.Dataset
|
||
schemaVersion: 0.0.1
|
||
comp-evaluate-model:
|
||
executorLabel: exec-evaluate-model
|
||
inputDefinitions:
|
||
artifacts:
|
||
dataset_input:
|
||
artifactType:
|
||
schemaTitle: system.Dataset
|
||
schemaVersion: 0.0.1
|
||
model_dir:
|
||
artifactType:
|
||
schemaTitle: system.Model
|
||
schemaVersion: 0.0.1
|
||
outputDefinitions:
|
||
artifacts:
|
||
metrics:
|
||
artifactType:
|
||
schemaTitle: system.Metrics
|
||
schemaVersion: 0.0.1
|
||
comp-train-model:
|
||
executorLabel: exec-train-model
|
||
inputDefinitions:
|
||
artifacts:
|
||
dataset_input:
|
||
artifactType:
|
||
schemaTitle: system.Dataset
|
||
schemaVersion: 0.0.1
|
||
parameters:
|
||
batch_size:
|
||
parameterType: NUMBER_INTEGER
|
||
epochs:
|
||
parameterType: NUMBER_INTEGER
|
||
outputDefinitions:
|
||
artifacts:
|
||
model_output:
|
||
artifactType:
|
||
schemaTitle: system.Model
|
||
schemaVersion: 0.0.1
|
||
defaultPipelineRoot: minio://xrwang/artifacts/
|
||
deploymentSpec:
|
||
executors:
|
||
exec-download-and-prepare-data-from-minio:
|
||
container:
|
||
args:
|
||
- --executor_input
|
||
- "{{$}}"
|
||
- --function_to_execute
|
||
- download_and_prepare_data_from_minio
|
||
command:
|
||
- sh
|
||
- -c
|
||
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
|
||
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
|
||
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.7.0'\
|
||
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\
|
||
\ python3 -m pip install --quiet --no-warn-script-location 'minio' 'pandas'\
|
||
\ 'tensorflow' 'pathlib' 'zipfile36' && \"$0\" \"$@\"\n"
|
||
- sh
|
||
- -ec
|
||
- 'program_path=$(mktemp -d)
|
||
|
||
|
||
printf "%s" "$0" > "$program_path/ephemeral_component.py"
|
||
|
||
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
|
||
|
||
'
|
||
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
|
||
\ *\n\ndef download_and_prepare_data_from_minio(\n dataset_output: Output[Dataset],\n\
|
||
\ minio_endpoint: str,\n minio_access_key: str,\n minio_secret_key:\
|
||
\ str,\n bucket_name: str,\n object_name: str,\n local_zip_path:\
|
||
\ str,\n secure: bool = True\n):\n from minio import Minio\n from\
|
||
\ zipfile import ZipFile\n from pathlib import Path\n import os\n\
|
||
\ client = Minio(\n minio_endpoint,\n access_key=minio_access_key,\n\
|
||
\ secret_key=minio_secret_key,\n secure=secure\n )\n \
|
||
\ print (\"download_and_prepare_data_from_minio\")\n client.fget_object(bucket_name,\
|
||
\ object_name, local_zip_path)\n with ZipFile(local_zip_path, 'r') as\
|
||
\ zip_ref:\n zip_ref.extractall(path=dataset_output.path)\n os.remove(local_zip_path)\n\
|
||
\ print(f\"Data prepared at: {dataset_output.path}\")\n\n"
|
||
image: python:3.7
|
||
exec-evaluate-model:
|
||
container:
|
||
args:
|
||
- --executor_input
|
||
- "{{$}}"
|
||
- --function_to_execute
|
||
- evaluate_model
|
||
command:
|
||
- sh
|
||
- -c
|
||
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
|
||
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
|
||
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.7.0'\
|
||
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\
|
||
\ python3 -m pip install --quiet --no-warn-script-location 'tensorflow'\
|
||
\ 'pandas' && \"$0\" \"$@\"\n"
|
||
- sh
|
||
- -ec
|
||
- 'program_path=$(mktemp -d)
|
||
|
||
|
||
printf "%s" "$0" > "$program_path/ephemeral_component.py"
|
||
|
||
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
|
||
|
||
'
|
||
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
|
||
\ *\n\ndef evaluate_model(\n model_dir: Input[Model],\n dataset_input:\
|
||
\ Input[Dataset],\n metrics: Output[Metrics]\n):\n import tensorflow\
|
||
\ as tf\n import os\n\n # \u52A0\u8F7D\u6A21\u578B\n model = tf.keras.models.load_model(model_dir.path)\n\
|
||
\n # \u52A0\u8F7D\u9A8C\u8BC1\u6570\u636E\u96C6\n validation_dir =\
|
||
\ os.path.join(dataset_input.path, 'validation')\n validation_dataset\
|
||
\ = tf.keras.utils.image_dataset_from_directory(\n validation_dir,\n\
|
||
\ image_size=(180, 180),\n batch_size=32)\n\n # \u8BC4\u4F30\
|
||
\u6A21\u578B\n eval_result = model.evaluate(validation_dataset)\n\n \
|
||
\ # \u8BB0\u5F55\u8BC4\u4F30\u7ED3\u679C\n metrics.log_metric(\"loss\"\
|
||
, eval_result[0])\n metrics.log_metric(\"accuracy\", eval_result[1])\n\
|
||
\n"
|
||
image: registry.cnbita.com:5000/kubeflow-pipelines/python-tensorflow:3.7
|
||
exec-train-model:
|
||
container:
|
||
args:
|
||
- --executor_input
|
||
- "{{$}}"
|
||
- --function_to_execute
|
||
- train_model
|
||
command:
|
||
- sh
|
||
- -c
|
||
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
|
||
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
|
||
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.7.0'\
|
||
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\
|
||
\ python3 -m pip install --quiet --no-warn-script-location 'tensorflow'\
|
||
\ 'numpy' && \"$0\" \"$@\"\n"
|
||
- sh
|
||
- -ec
|
||
- 'program_path=$(mktemp -d)
|
||
|
||
|
||
printf "%s" "$0" > "$program_path/ephemeral_component.py"
|
||
|
||
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
|
||
|
||
'
|
||
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
|
||
\ *\n\ndef train_model(\n dataset_input: Input[Dataset],\n model_output:\
|
||
\ Output[Model],\n epochs: int,\n batch_size: int\n):\n import\
|
||
\ tensorflow as tf\n import os\n train_dir = os.path.join(dataset_input.path,\
|
||
\ 'train')\n validation_dir = os.path.join(dataset_input.path, 'validation')\n\
|
||
\n train_dataset = tf.keras.utils.image_dataset_from_directory(\n \
|
||
\ train_dir,\n image_size=(180, 180),\n batch_size=batch_size)\n\
|
||
\n validation_dataset = tf.keras.utils.image_dataset_from_directory(\n\
|
||
\ validation_dir,\n image_size=(180, 180),\n batch_size=batch_size)\n\
|
||
\n model = tf.keras.Sequential([\n tf.keras.layers.Rescaling(1./255),\n\
|
||
\ tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu'),\n\
|
||
\ tf.keras.layers.MaxPooling2D(),\n tf.keras.layers.Conv2D(32,\
|
||
\ 3, padding='same', activation='relu'),\n tf.keras.layers.MaxPooling2D(),\n\
|
||
\ tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),\n\
|
||
\ tf.keras.layers.MaxPooling2D(),\n tf.keras.layers.Flatten(),\n\
|
||
\ tf.keras.layers.Dense(128, activation='relu'), \n tf.keras.layers.Dense(2)\n\
|
||
\n ])\n\n model.compile(optimizer='adam',\n loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n\
|
||
\ metrics=['accuracy'])\n\n model.fit(train_dataset,\
|
||
\ validation_data=validation_dataset, epochs=epochs, batch_size=batch_size)\n\
|
||
\ model.save(model_output.path)\n\n"
|
||
image: registry.cnbita.com:5000/kubeflow-pipelines/python-tensorflow:3.7
|
||
pipelineInfo:
|
||
description:
|
||
A pipeline to train and evaluate a cats-vs-dogs classifier using dataset
|
||
from MinIO.
|
||
name: cats-vs-dogs-classification-from-minio
|
||
root:
|
||
dag:
|
||
outputs:
|
||
artifacts:
|
||
evaluate-model-metrics:
|
||
artifactSelectors:
|
||
- outputArtifactKey: metrics
|
||
producerSubtask: evaluate-model
|
||
#·、该组件的输入参数来源哪个组件 2、组件之间的依赖 3、组件的名称(唯一标识)
|
||
tasks:
|
||
download-and-prepare-data-from-minio:
|
||
cachingOptions:
|
||
enableCache: true
|
||
componentRef:
|
||
name: comp-download-and-prepare-data-from-minio
|
||
inputs:
|
||
parameters:
|
||
bucket_name:
|
||
componentInputParameter: bucket_name
|
||
local_zip_path:
|
||
componentInputParameter: local_zip_path
|
||
minio_access_key:
|
||
componentInputParameter: minio_access_key
|
||
minio_endpoint:
|
||
componentInputParameter: minio_endpoint
|
||
minio_secret_key:
|
||
componentInputParameter: minio_secret_key
|
||
object_name:
|
||
componentInputParameter: object_name
|
||
secure:
|
||
componentInputParameter: secure
|
||
taskInfo:
|
||
name: download-and-prepare-data-from-minio
|
||
evaluate-model:
|
||
cachingOptions: {}
|
||
#当前组件(evaluate-model)的定义来源(comp-evaluate-model)
|
||
componentRef:
|
||
name: comp-evaluate-model
|
||
#放在哪些组件之后跑
|
||
dependentTasks:
|
||
- download-and-prepare-data-from-minio
|
||
- train-model
|
||
# 该组件的输入参数来源哪个组件
|
||
inputs:
|
||
artifacts:
|
||
dataset_input:
|
||
taskOutputArtifact:
|
||
outputArtifactKey: dataset_output
|
||
producerTask: download-and-prepare-data-from-minio
|
||
model_dir:
|
||
taskOutputArtifact:
|
||
outputArtifactKey: model_output
|
||
producerTask: train-model
|
||
taskInfo:
|
||
name: evaluate-model
|
||
train-model:
|
||
cachingOptions: {}
|
||
componentRef:
|
||
name: comp-train-model
|
||
dependentTasks:
|
||
- download-and-prepare-data-from-minio
|
||
inputs:
|
||
artifacts:
|
||
dataset_input:
|
||
taskOutputArtifact:
|
||
outputArtifactKey: dataset_output
|
||
producerTask: download-and-prepare-data-from-minio
|
||
parameters:
|
||
batch_size:
|
||
componentInputParameter: batch_size
|
||
epochs:
|
||
componentInputParameter: epochs
|
||
taskInfo:
|
||
name: train-model
|
||
inputDefinitions:
|
||
parameters:
|
||
batch_size:
|
||
defaultValue: 32.0
|
||
isOptional: true
|
||
parameterType: NUMBER_INTEGER
|
||
bucket_name:
|
||
defaultValue: xrwang
|
||
isOptional: true
|
||
parameterType: STRING
|
||
epochs:
|
||
defaultValue: 10.0
|
||
isOptional: true
|
||
parameterType: NUMBER_INTEGER
|
||
local_zip_path:
|
||
defaultValue: /tmp/cat-dog_data.zip
|
||
isOptional: true
|
||
parameterType: STRING
|
||
minio_access_key:
|
||
defaultValue: LEINAOYUNOS
|
||
isOptional: true
|
||
parameterType: STRING
|
||
minio_endpoint:
|
||
defaultValue: miniogw-dev2.cnbita.com:11443
|
||
isOptional: true
|
||
parameterType: STRING
|
||
minio_secret_key:
|
||
defaultValue: wJalrXUtnFEMI/K7MDENG/bPxRfiCYLEINAOYUNKEY
|
||
isOptional: true
|
||
parameterType: STRING
|
||
object_name:
|
||
defaultValue: catdog/cat-dog_data.zip
|
||
isOptional: true
|
||
parameterType: STRING
|
||
secure:
|
||
defaultValue: true
|
||
isOptional: true
|
||
parameterType: BOOLEAN
|
||
outputDefinitions:
|
||
artifacts:
|
||
evaluate-model-metrics:
|
||
artifactType:
|
||
schemaTitle: system.Metrics
|
||
schemaVersion: 0.0.1
|
||
schemaVersion: 2.1.0
|
||
sdkVersion: kfp-2.7.0
|