Skip to content
This repository has been archived by the owner on Sep 19, 2022. It is now read-only.

Commit

Permalink
Adding v1beta2 API implementation (#138)
Browse files Browse the repository at this point in the history
* Adding v1beta2 API implementation

* Build v1beta2
  • Loading branch information
johnugeorge authored and k8s-ci-robot committed Feb 14, 2019
1 parent 8c9dc74 commit da7798e
Show file tree
Hide file tree
Showing 71 changed files with 16,309 additions and 27 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ install:

script:
- go build -o pytorch-operator.v1beta1 github.com/kubeflow/pytorch-operator/cmd/pytorch-operator.v1beta1
- go build -o pytorch-operator.v1beta2 github.com/kubeflow/pytorch-operator/cmd/pytorch-operator.v1beta2
- gometalinter --config=linter_config.json ./pkg/...
# We customize the build step because by default
# Travis runs go test -v ./... which will include the vendor
Expand Down
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
FROM debian:jessie

COPY pytorch-operator.v1beta1 /pytorch-operator.v1beta1
COPY pytorch-operator.v1beta2 /pytorch-operator.v1beta2

ENTRYPOINT ["/pytorch-operator", "-alsologtostderr"]
5 changes: 3 additions & 2 deletions Gopkg.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions build_image.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ ln -s ${CONTEXT_DIR} ${GO_DIR}
cd ${GO_DIR}
echo "Build pytorch operator v1beta1 binary"
go build github.com/kubeflow/pytorch-operator/cmd/pytorch-operator.v1beta1
echo "Build pytorch operator v1beta2 binary"
go build github.com/kubeflow/pytorch-operator/cmd/pytorch-operator.v1beta2

echo "Building container in gcloud"
gcloud builds submit . --tag=${IMAGE}:${TAG}
Expand Down
58 changes: 58 additions & 0 deletions cmd/pytorch-operator.v1beta2/app/options/options.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// Copyright 2018 The Kubeflow Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package options

import (
"flag"

"k8s.io/api/core/v1"
)

// ServerOption is the main context object for the controller manager.
type ServerOption struct {
Kubeconfig string
MasterURL string
Threadiness int
PrintVersion bool
JSONLogFormat bool
EnableGangScheduling bool
Namespace string
}

// NewServerOption creates a new CMServer with a default config.
func NewServerOption() *ServerOption {
s := ServerOption{}
return &s
}

// AddFlags adds flags for a specific CMServer to the specified FlagSet.
func (s *ServerOption) AddFlags(fs *flag.FlagSet) {
fs.StringVar(&s.MasterURL, "master", "",
`The url of the Kubernetes API server,
will overrides any value in kubeconfig, only required if out-of-cluster.`)

fs.StringVar(&s.Namespace, "namespace", v1.NamespaceAll,
`The namespace to monitor pytorch jobs. If unset, it monitors all namespaces cluster-wide.
If set, it only monitors pytorch jobs in the given namespace.`)

fs.IntVar(&s.Threadiness, "threadiness", 1,
`How many threads to process the main logic`)

fs.BoolVar(&s.PrintVersion, "version", false, "Show version and quit")

fs.BoolVar(&s.JSONLogFormat, "json-log-format", true,
"Set true to use json style log format. Set false to use plaintext style log format")
fs.BoolVar(&s.EnableGangScheduling, "enable-gang-scheduling", false, "Set true to enable gang scheduling by kube-batch.")
}
192 changes: 192 additions & 0 deletions cmd/pytorch-operator.v1beta2/app/server.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
// Copyright 2018 The Kubeflow Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package app

import (
"fmt"
"os"
"time"

log "github.com/sirupsen/logrus"
"k8s.io/api/core/v1"
crdclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kubeinformers "k8s.io/client-go/informers"
kubeclientset "k8s.io/client-go/kubernetes"
restclientset "k8s.io/client-go/rest"
"k8s.io/client-go/tools/clientcmd"
election "k8s.io/client-go/tools/leaderelection"
"k8s.io/client-go/tools/leaderelection/resourcelock"
"k8s.io/client-go/tools/record"

"github.com/kubeflow/pytorch-operator/cmd/pytorch-operator.v1beta2/app/options"
"github.com/kubeflow/pytorch-operator/pkg/apis/pytorch/v1beta2"
jobclientset "github.com/kubeflow/pytorch-operator/pkg/client/clientset/versioned"
"github.com/kubeflow/pytorch-operator/pkg/client/clientset/versioned/scheme"
jobinformers "github.com/kubeflow/pytorch-operator/pkg/client/informers/externalversions"
controller "github.com/kubeflow/pytorch-operator/pkg/controller.v1beta2/pytorch"
"github.com/kubeflow/tf-operator/pkg/util/signals"
"github.com/kubeflow/tf-operator/pkg/version"
)

const (
apiVersion = "v1beta2"
)

var (
// leader election config
leaseDuration = 15 * time.Second
renewDuration = 5 * time.Second
retryPeriod = 3 * time.Second
resyncPeriod = 30 * time.Second
)

const RecommendedKubeConfigPathEnv = "KUBECONFIG"

func Run(opt *options.ServerOption) error {
// Check if the -version flag was passed and, if so, print the version and exit.
if opt.PrintVersion {
version.PrintVersionAndExit(apiVersion)
}

namespace := os.Getenv(v1beta2.EnvKubeflowNamespace)
if len(namespace) == 0 {
log.Infof("EnvKubeflowNamespace not set, use default namespace")
namespace = metav1.NamespaceDefault
}

// To help debugging, immediately log version.
log.Infof("%+v", version.Info(apiVersion))

// Set up signals so we handle the first shutdown signal gracefully.
stopCh := signals.SetupSignalHandler()

// Note: ENV KUBECONFIG will overwrite user defined Kubeconfig option.
if len(os.Getenv(RecommendedKubeConfigPathEnv)) > 0 {
// use the current context in kubeconfig
// This is very useful for running locally.
opt.Kubeconfig = os.Getenv(RecommendedKubeConfigPathEnv)
}

// Get kubernetes config.
kcfg, err := clientcmd.BuildConfigFromFlags(opt.MasterURL, opt.Kubeconfig)
if err != nil {
log.Fatalf("Error building kubeconfig: %s", err.Error())
}

// Create clients.
kubeClientSet, leaderElectionClientSet, pytorchJobClientSet, err := createClientSets(kcfg)
if err != nil {
return err
}

// Create informer factory.
kubeInformerFactory := kubeinformers.NewFilteredSharedInformerFactory(kubeClientSet, resyncPeriod, opt.Namespace, nil)
pytorchJobInformerFactory := jobinformers.NewSharedInformerFactory(pytorchJobClientSet, resyncPeriod)

unstructuredInformer := controller.NewUnstructuredPyTorchJobInformer(kcfg, opt.Namespace)

// Create pytorch controller.
tc := controller.NewPyTorchController(unstructuredInformer, kubeClientSet, pytorchJobClientSet, kubeInformerFactory, pytorchJobInformerFactory, *opt)

// Start informer goroutines.
go kubeInformerFactory.Start(stopCh)

go unstructuredInformer.Informer().Run(stopCh)

// Set leader election start function.
run := func(<-chan struct{}) {
if err := tc.Run(opt.Threadiness, stopCh); err != nil {
log.Errorf("Failed to run the controller: %v", err)
}
}

id, err := os.Hostname()
if err != nil {
return fmt.Errorf("failed to get hostname: %v", err)
}

// Prepare event clients.
eventBroadcaster := record.NewBroadcaster()
if err = v1.AddToScheme(scheme.Scheme); err != nil {
return fmt.Errorf("coreV1 Add Scheme failed: %v", err)
}
recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "pytorch-operator"})

rl := &resourcelock.EndpointsLock{
EndpointsMeta: metav1.ObjectMeta{
Namespace: namespace,
Name: "pytorch-operator",
},
Client: leaderElectionClientSet.CoreV1(),
LockConfig: resourcelock.ResourceLockConfig{
Identity: id,
EventRecorder: recorder,
},
}

// Start leader election.
election.RunOrDie(election.LeaderElectionConfig{
Lock: rl,
LeaseDuration: leaseDuration,
RenewDeadline: renewDuration,
RetryPeriod: retryPeriod,
Callbacks: election.LeaderCallbacks{
OnStartedLeading: run,
OnStoppedLeading: func() {
log.Fatalf("leader election lost")
},
},
})

return nil
}

func createClientSets(config *restclientset.Config) (kubeclientset.Interface, kubeclientset.Interface, jobclientset.Interface, error) {

crdClient, err := crdclient.NewForConfig(config)

if err != nil {
return nil, nil, nil, err
}

checkCRDExists(crdClient, v1beta2.PytorchCRD)

kubeClientSet, err := kubeclientset.NewForConfig(restclientset.AddUserAgent(config, "pytorch-operator"))
if err != nil {
return nil, nil, nil, err
}

leaderElectionClientSet, err := kubeclientset.NewForConfig(restclientset.AddUserAgent(config, "leader-election"))
if err != nil {
return nil, nil, nil, err
}

jobClientSet, err := jobclientset.NewForConfig(config)
if err != nil {
return nil, nil, nil, err
}

return kubeClientSet, leaderElectionClientSet, jobClientSet, nil
}

func checkCRDExists(clientset crdclient.Interface, crdName string) {
_, err := clientset.ApiextensionsV1beta1().CustomResourceDefinitions().Get(crdName, metav1.GetOptions{})

if err != nil {
log.Error(err)
os.Exit(1)
}
}
49 changes: 49 additions & 0 deletions cmd/pytorch-operator.v1beta2/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Copyright 2018 The Kubeflow Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
"flag"

"github.com/onrik/logrus/filename"
log "github.com/sirupsen/logrus"

"github.com/kubeflow/pytorch-operator/cmd/pytorch-operator.v1beta2/app"
"github.com/kubeflow/pytorch-operator/cmd/pytorch-operator.v1beta2/app/options"
)

func init() {
// Add filename as one of the fields of the structured log message.
filenameHook := filename.NewHook()
filenameHook.Field = "filename"
log.AddHook(filenameHook)
}

func main() {
s := options.NewServerOption()
s.AddFlags(flag.CommandLine)

flag.Parse()

if s.JSONLogFormat {
// Output logs in a json format so that it can be parsed by services like Stackdriver.
log.SetFormatter(&log.JSONFormatter{})
}

if err := app.Run(s); err != nil {
log.Fatalf("%v\n", err)
}

}
8 changes: 7 additions & 1 deletion hack/update-codegen.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,17 @@ CODEGEN_PKG=${CODEGEN_PKG:-$(cd ${SCRIPT_ROOT}; ls -d -1 ./vendor/k8s.io/code-ge
# instead of the $GOPATH directly. For normal projects this can be dropped.
${CODEGEN_PKG}/generate-groups.sh "defaulter,deepcopy,client,informer,lister" \
github.com/kubeflow/pytorch-operator/pkg/client github.com/kubeflow/pytorch-operator/pkg/apis \
pytorch:v1beta1 \
pytorch:v1beta1,v1beta2 \
--go-header-file ${SCRIPT_ROOT}/hack/boilerplate/boilerplate.go.txt

echo "Generating defaulters for pytorch v1beta1"
${GOPATH}/bin/defaulter-gen --input-dirs github.com/kubeflow/pytorch-operator/pkg/apis/pytorch/v1beta1 \
-O zz_generated.defaults \
--go-header-file ./hack/../hack/boilerplate/boilerplate.go.txt \
--output-package github.com/kubeflow/pytorch-operator/pkg/apis/pytorch/v1beta1

echo "Generating defaulters for pytorch v1beta2"
${GOPATH}/bin/defaulter-gen --input-dirs github.com/kubeflow/pytorch-operator/pkg/apis/pytorch/v1beta2 \
-O zz_generated.defaults \
--go-header-file ./hack/../hack/boilerplate/boilerplate.go.txt \
--output-package github.com/kubeflow/pytorch-operator/pkg/apis/pytorch/v1beta2
Loading

0 comments on commit da7798e

Please sign in to comment.