Skip to content

Remove local_interactive parameter and related functions #512

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/e2e_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ jobs:
make setup-e2e
echo Deploying CodeFlare operator
IMG="${REGISTRY_ADDRESS}"/codeflare-operator
sed -i 's/RayDashboardOAuthEnabled: pointer.Bool(true)/RayDashboardOAuthEnabled: pointer.Bool(false)/' main.go
make image-push -e IMG="${IMG}"
make deploy -e IMG="${IMG}" -e ENV="e2e"
kubectl wait --timeout=120s --for=condition=Available=true deployment -n openshift-operators codeflare-operator-manager
Expand Down
17 changes: 2 additions & 15 deletions src/codeflare_sdk/cluster/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,6 @@ def create_app_wrapper(self):
mcad = self.config.mcad
instance_types = self.config.machine_types
env = self.config.envs
local_interactive = self.config.local_interactive
image_pull_secrets = self.config.image_pull_secrets
dispatch_priority = self.config.dispatch_priority
write_to_file = self.config.write_to_file
Expand All @@ -204,7 +203,6 @@ def create_app_wrapper(self):
mcad=mcad,
instance_types=instance_types,
env=env,
local_interactive=local_interactive,
image_pull_secrets=image_pull_secrets,
dispatch_priority=dispatch_priority,
priority_val=priority_val,
Expand Down Expand Up @@ -480,13 +478,6 @@ def from_k8_cluster_object(
verify_tls=True,
):
config_check()
if (
rc["metadata"]["annotations"]["sdk.codeflare.dev/local_interactive"]
== "True"
):
local_interactive = True
else:
local_interactive = False
machine_types = (
rc["metadata"]["labels"]["orderedinstance"].split("_")
if "orderedinstance" in rc["metadata"]["labels"]
Expand Down Expand Up @@ -527,19 +518,15 @@ def from_k8_cluster_object(
image=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][
0
]["image"],
local_interactive=local_interactive,
mcad=mcad,
write_to_file=write_to_file,
verify_tls=verify_tls,
)
return Cluster(cluster_config)

def local_client_url(self):
if self.config.local_interactive == True:
ingress_domain = _get_ingress_domain(self)
return f"ray://{ingress_domain}"
else:
return "None"
ingress_domain = _get_ingress_domain(self)
return f"ray://{ingress_domain}"

def _component_resources_up(
self, namespace: str, api_instance: client.CustomObjectsApi
Expand Down
1 change: 0 additions & 1 deletion src/codeflare_sdk/cluster/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ class ClusterConfiguration:
mcad: bool = False
envs: dict = field(default_factory=dict)
image: str = ""
local_interactive: bool = False
image_pull_secrets: list = field(default_factory=list)
dispatch_priority: str = None
write_to_file: bool = False
Expand Down
73 changes: 0 additions & 73 deletions src/codeflare_sdk/templates/base-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@ spec:
apiVersion: ray.io/v1
kind: RayCluster
metadata:
annotations:
sdk.codeflare.dev/local_interactive: "False"
labels:
workload.codeflare.dev/appwrapper: "aw-kuberay"
controller-tools.k8s.io: "1.0"
Expand Down Expand Up @@ -153,12 +151,6 @@ spec:
memory: "8G"
nvidia.com/gpu: 0
volumeMounts:
- name: ca-vol
mountPath: "/home/ray/workspace/ca"
readOnly: true
- name: server-cert
mountPath: "/home/ray/workspace/tls"
readOnly: true
- mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt
name: odh-trusted-ca-cert
subPath: odh-trusted-ca-bundle.crt
Expand All @@ -171,30 +163,7 @@ spec:
- mountPath: /etc/ssl/certs/odh-ca-bundle.crt
name: odh-ca-cert
subPath: odh-ca-bundle.crt
initContainers:
- command:
- sh
- -c
- cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048 -keyout server.key -out server.csr -subj '/CN=ray-head' && printf "authorityKeyIdentifier=keyid,issuer\nbasicConstraints=CA:FALSE\nsubjectAltName = @alt_names\n[alt_names]\nDNS.1 = 127.0.0.1\nDNS.2 = localhost\nDNS.3 = ${FQ_RAY_IP}\nDNS.4 = $(awk 'END{print $1}' /etc/hosts)\nDNS.5 = rayclient-deployment-name-$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).server-name">./domain.ext && cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial -extfile domain.ext
image: quay.io/project-codeflare/ray:latest-py39-cu118
name: create-cert
# securityContext:
# runAsUser: 1000
# runAsGroup: 1000
volumeMounts:
- name: ca-vol
mountPath: "/home/ray/workspace/ca"
readOnly: true
- name: server-cert
mountPath: "/home/ray/workspace/tls"
readOnly: false
volumes:
- name: ca-vol
secret:
secretName: ca-secret-deployment-name
optional: false
- name: server-cert
emptyDir: {}
- name: odh-trusted-ca-cert
configMap:
name: odh-trusted-ca-bundle
Expand Down Expand Up @@ -249,24 +218,6 @@ spec:
operator: In
values:
- "aw-kuberay"
initContainers:
# the env var $RAY_IP is set by the operator if missing, with the value of the head service name
- name: create-cert
image: quay.io/project-codeflare/ray:latest-py39-cu118
command:
- sh
- -c
- cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048 -keyout server.key -out server.csr -subj '/CN=ray-head' && printf "authorityKeyIdentifier=keyid,issuer\nbasicConstraints=CA:FALSE\nsubjectAltName = @alt_names\n[alt_names]\nDNS.1 = 127.0.0.1\nDNS.2 = localhost\nDNS.3 = ${FQ_RAY_IP}\nDNS.4 = $(awk 'END{print $1}' /etc/hosts)">./domain.ext && cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial -extfile domain.ext
# securityContext:
# runAsUser: 1000
# runAsGroup: 1000
volumeMounts:
- name: ca-vol
mountPath: "/home/ray/workspace/ca"
readOnly: true
- name: server-cert
mountPath: "/home/ray/workspace/tls"
readOnly: false
containers:
- name: machine-learning # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc'
image: quay.io/project-codeflare/ray:latest-py39-cu118
Expand Down Expand Up @@ -299,12 +250,6 @@ spec:
memory: "12G"
nvidia.com/gpu: "1"
volumeMounts:
- name: ca-vol
mountPath: "/home/ray/workspace/ca"
readOnly: true
- name: server-cert
mountPath: "/home/ray/workspace/tls"
readOnly: true
- mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt
name: odh-trusted-ca-cert
subPath: odh-trusted-ca-bundle.crt
Expand All @@ -318,12 +263,6 @@ spec:
name: odh-ca-cert
subPath: odh-ca-bundle.crt
volumes:
- name: ca-vol
secret:
secretName: ca-secret-deployment-name
optional: false
- name: server-cert
emptyDir: {}
- name: odh-trusted-ca-cert
configMap:
name: odh-trusted-ca-bundle
Expand All @@ -338,15 +277,3 @@ spec:
- key: odh-ca-bundle.crt
path: odh-ca-bundle.crt
optional: true
- replicas: 1
generictemplate:
apiVersion: v1
data:
ca.crt: generated_crt
ca.key: generated_key
kind: Secret
metadata:
name: ca-secret-deployment-name
labels:
# allows me to return name of service that Ray operator creates
odh-ray-cluster-service: deployment-name-head-svc
104 changes: 0 additions & 104 deletions src/codeflare_sdk/utils/generate_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,19 +64,6 @@ def is_openshift_cluster():
return _kube_api_error_handling(e)


def is_kind_cluster():
try:
config_check()
v1 = client.CoreV1Api()
label_selector = "kubernetes.io/hostname=kind-control-plane"
nodes = v1.list_node(label_selector=label_selector)
# If we find one or more nodes with the label, assume it's a KinD cluster
return len(nodes.items) > 0
except Exception as e:
print(f"Error checking if cluster is KinD: {e}")
return False


def update_names(yaml, item, appwrapper_name, cluster_name, namespace):
metadata = yaml.get("metadata")
metadata["name"] = appwrapper_name
Expand Down Expand Up @@ -291,95 +278,10 @@ def update_ca_secret(ca_secret_item, cluster_name, namespace):
data["ca.key"], data["ca.crt"] = generate_cert.generate_ca_cert(365)


def enable_local_interactive(resources, cluster_name, namespace): # pragma: no cover
from ..cluster.cluster import _get_ingress_domain

ca_secret_item = resources["resources"].get("GenericItems")[1]
item = resources["resources"].get("GenericItems")[0]
update_ca_secret(ca_secret_item, cluster_name, namespace)
# update_ca_secret_volumes
item["generictemplate"]["spec"]["headGroupSpec"]["template"]["spec"]["volumes"][0][
"secret"
]["secretName"] = f"ca-secret-{cluster_name}"
item["generictemplate"]["spec"]["workerGroupSpecs"][0]["template"]["spec"][
"volumes"
][0]["secret"]["secretName"] = f"ca-secret-{cluster_name}"
# update_tls_env
item["generictemplate"]["spec"]["headGroupSpec"]["template"]["spec"]["containers"][
0
]["env"][1]["value"] = "1"
item["generictemplate"]["spec"]["workerGroupSpecs"][0]["template"]["spec"][
"containers"
][0]["env"][1]["value"] = "1"
# update_init_container
command = item["generictemplate"]["spec"]["headGroupSpec"]["template"]["spec"][
"initContainers"
][0].get("command")[2]

command = command.replace("deployment-name", cluster_name)

domain = "" ## FIX - We can't retrieve ingress domain - move init container to CFO

command = command.replace("server-name", domain)
item["generictemplate"]["metadata"]["annotations"][
"sdk.codeflare.dev/local_interactive"
] = "True"

item["generictemplate"]["spec"]["headGroupSpec"]["template"]["spec"][
"initContainers"
][0].get("command")[2] = command


def del_from_list_by_name(l: list, target: typing.List[str]) -> list:
return [x for x in l if x["name"] not in target]


def disable_raycluster_tls(resources):
generic_template_spec = resources["GenericItems"][0]["generictemplate"]["spec"]

headGroupTemplateSpec = generic_template_spec["headGroupSpec"]["template"]["spec"]
headGroupTemplateSpec["volumes"] = del_from_list_by_name(
headGroupTemplateSpec.get("volumes", []),
["ca-vol", "server-cert"],
)

c: dict
for c in generic_template_spec["headGroupSpec"]["template"]["spec"]["containers"]:
c["volumeMounts"] = del_from_list_by_name(
c.get("volumeMounts", []), ["ca-vol", "server-cert"]
)

if "initContainers" in generic_template_spec["headGroupSpec"]["template"]["spec"]:
del generic_template_spec["headGroupSpec"]["template"]["spec"]["initContainers"]

for workerGroup in generic_template_spec.get("workerGroupSpecs", []):
workerGroupSpec = workerGroup["template"]["spec"]
workerGroupSpec["volumes"] = del_from_list_by_name(
workerGroupSpec.get("volumes", []),
["ca-vol", "server-cert"],
)
for c in workerGroup["template"]["spec"].get("containers", []):
c["volumeMounts"] = del_from_list_by_name(
c.get("volumeMounts", []), ["ca-vol", "server-cert"]
)

del generic_template_spec["workerGroupSpecs"][0]["template"]["spec"][
"initContainers"
]

updated_items = []
for i in resources["GenericItems"][:]:
if "rayclient-deployment-ingress" in i["generictemplate"]["metadata"]["name"]:
continue
if "rayclient-deployment-route" in i["generictemplate"]["metadata"]["name"]:
continue
if "ca-secret-deployment-name" in i["generictemplate"]["metadata"]["name"]:
continue
updated_items.append(i)

resources["GenericItems"] = updated_items


def write_user_appwrapper(user_yaml, output_file_name):
# Create the directory if it doesn't exist
directory_path = os.path.dirname(output_file_name)
Expand Down Expand Up @@ -568,7 +470,6 @@ def generate_appwrapper(
mcad: bool,
instance_types: list,
env,
local_interactive: bool,
image_pull_secrets: list,
dispatch_priority: str,
priority_val: int,
Expand Down Expand Up @@ -619,11 +520,6 @@ def generate_appwrapper(
head_gpus,
)

if local_interactive:
enable_local_interactive(resources, cluster_name, namespace)
else:
disable_raycluster_tls(resources["resources"])

if is_openshift_cluster():
enable_openshift_oauth(user_yaml, cluster_name, namespace)

Expand Down
2 changes: 0 additions & 2 deletions tests/test-case-bad.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ spec:
apiVersion: ray.io/v1
kind: RayCluster
metadata:
annotations:
sdk.codeflare.dev/local_interactive: 'False'
labels:
workload.codeflare.dev/appwrapper: unit-test-cluster
controller-tools.k8s.io: '1.0'
Expand Down
2 changes: 0 additions & 2 deletions tests/test-case-no-mcad.yamls
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
apiVersion: ray.io/v1
kind: RayCluster
metadata:
annotations:
sdk.codeflare.dev/local_interactive: 'False'
labels:
controller-tools.k8s.io: '1.0'
kueue.x-k8s.io/queue-name: local-queue-default
Expand Down
2 changes: 0 additions & 2 deletions tests/test-case-prio.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ spec:
apiVersion: ray.io/v1
kind: RayCluster
metadata:
annotations:
sdk.codeflare.dev/local_interactive: 'False'
labels:
controller-tools.k8s.io: '1.0'
workload.codeflare.dev/appwrapper: prio-test-cluster
Expand Down
2 changes: 0 additions & 2 deletions tests/test-case.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ spec:
apiVersion: ray.io/v1
kind: RayCluster
metadata:
annotations:
sdk.codeflare.dev/local_interactive: 'False'
labels:
controller-tools.k8s.io: '1.0'
workload.codeflare.dev/appwrapper: unit-test-cluster
Expand Down
2 changes: 0 additions & 2 deletions tests/test-default-appwrapper.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ spec:
apiVersion: ray.io/v1
kind: RayCluster
metadata:
annotations:
sdk.codeflare.dev/local_interactive: 'False'
labels:
controller-tools.k8s.io: '1.0'
workload.codeflare.dev/appwrapper: unit-test-default-cluster
Expand Down
Loading