-
Notifications
You must be signed in to change notification settings - Fork 4.8k
OCPBUGS-59176: fix several failing tests in custom-dns jobs #31129
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,7 +2,6 @@ package router | |
|
|
||
| import ( | ||
| "context" | ||
| "crypto/tls" | ||
| "errors" | ||
| "fmt" | ||
| "net" | ||
|
|
@@ -367,10 +366,9 @@ var _ = g.Describe("[sig-network-edge][OCPFeatureGate:GatewayAPIController][Feat | |
| assertGatewayLoadbalancerReady(oc, gw, gw+"-openshift-default") | ||
| } | ||
|
|
||
| // check the dns record is created and status of the published dnsrecord of all zones are True | ||
| if managedDNS { | ||
| assertDNSRecordStatus(oc, gw) | ||
| } | ||
| // Check the DNSRecord status - when DNS zones are configured, expects Published=True; | ||
| // when DNS zones are not configured (custom-dns), expects Published!=True | ||
| assertDNSRecordStatus(oc, gw) | ||
| }) | ||
|
|
||
| g.It("Ensure HTTPRoute object is created", func() { | ||
|
|
@@ -391,10 +389,9 @@ var _ = g.Describe("[sig-network-edge][OCPFeatureGate:GatewayAPIController][Feat | |
| _, gwerr := createAndCheckGateway(oc, gw, gatewayClassName, customDomain, loadBalancerSupported) | ||
| o.Expect(gwerr).NotTo(o.HaveOccurred(), "Failed to create Gateway") | ||
|
|
||
| // make sure the DNSRecord is ready to use | ||
| if managedDNS { | ||
| assertDNSRecordStatus(oc, gw) | ||
| } | ||
| // Verify DNSRecord is ready - expects Published=True when zones configured, | ||
| // Published!=True when zones not configured (custom-dns) | ||
| assertDNSRecordStatus(oc, gw) | ||
|
|
||
| g.By("Create the http route using the custom gateway") | ||
| defaultRoutename := "test-hostname." + customDomain | ||
|
|
@@ -403,9 +400,9 @@ var _ = g.Describe("[sig-network-edge][OCPFeatureGate:GatewayAPIController][Feat | |
| g.By("Checking the http route using the default gateway is accepted") | ||
| assertHttpRouteSuccessful(oc, gw, "test-httproute") | ||
|
|
||
| g.By("Validating the http connectivity to the backend application") | ||
| if loadBalancerSupported && managedDNS { | ||
| assertHttpRouteConnection(defaultRoutename) | ||
| if loadBalancerSupported { | ||
| g.By("Validating the http connectivity to the backend application") | ||
| assertHttpRouteConnection(oc, gw+"-openshift-default", defaultRoutename) | ||
| } | ||
| }) | ||
|
|
||
|
|
@@ -612,18 +609,24 @@ func getPlatformCapabilities(oc *exutil.CLI) (loadBalancerSupported bool, manage | |
| loadBalancerSupported = false | ||
| } | ||
|
|
||
| managedDNS = isDNSManaged(oc) | ||
| managedDNS, err = isDNSZoneConfigured(oc) | ||
| o.Expect(err).NotTo(o.HaveOccurred(), "Failed to check if DNS zone is configured") | ||
|
|
||
| e2e.Logf("Platform: %s, LoadBalancer supported: %t, DNS managed: %t", infra.Status.PlatformStatus.Type, loadBalancerSupported, managedDNS) | ||
| return loadBalancerSupported, managedDNS | ||
| } | ||
|
|
||
| // isDNSManaged checks if the cluster has DNS zones configured (public or private). | ||
| // isDNSZoneConfigured checks if the cluster has DNS zones configured (public or private). | ||
| // On platforms like vSphere without external DNS, DNS records cannot be managed. | ||
| func isDNSManaged(oc *exutil.CLI) bool { | ||
| // This checks dns.config.openshift.io/cluster PublicZone/PrivateZone, and is named | ||
| // isDNSZoneConfigured (not isDNSManaged) to avoid confusion with the DNSManaged | ||
| // IngressController condition. | ||
| func isDNSZoneConfigured(oc *exutil.CLI) (bool, error) { | ||
| dnsConfig, err := oc.AdminConfigClient().ConfigV1().DNSes().Get(context.Background(), "cluster", metav1.GetOptions{}) | ||
| o.Expect(err).NotTo(o.HaveOccurred(), "Failed to get DNS config") | ||
| return dnsConfig.Spec.PrivateZone != nil || dnsConfig.Spec.PublicZone != nil | ||
| if err != nil { | ||
| return false, err | ||
| } | ||
| return dnsConfig.Spec.PrivateZone != nil || dnsConfig.Spec.PublicZone != nil, nil | ||
| } | ||
|
|
||
| // isIPv6OrDualStack checks if the cluster is using IPv6 or dual-stack networking. | ||
|
|
@@ -753,18 +756,17 @@ func buildGateway(name, namespace, gcname, fromNs, domain string) *gatewayapiv1. | |
| } | ||
| } | ||
|
|
||
| // assertGatewayLoadbalancerReady verifies that the given gateway has the service's load balancer address assigned. | ||
| func assertGatewayLoadbalancerReady(oc *exutil.CLI, gwName, gwServiceName string) { | ||
| // check gateway LB service, note that External-IP might be hostname (AWS) or IP (Azure/GCP) | ||
| // return LoadBalancer service address, note that External-IP might be hostname (AWS) or IP (Azure/GCP) | ||
| func getLoadBalancerAddress(oc *exutil.CLI, serviceName string) string { | ||
| var lbAddress string | ||
| err := wait.PollUntilContextTimeout(context.Background(), 1*time.Second, loadBalancerReadyTimeout, false, func(context context.Context) (bool, error) { | ||
| lbService, err := oc.AdminKubeClient().CoreV1().Services(ingressNamespace).Get(context, gwServiceName, metav1.GetOptions{}) | ||
| lbService, err := oc.AdminKubeClient().CoreV1().Services(ingressNamespace).Get(context, serviceName, metav1.GetOptions{}) | ||
| if err != nil { | ||
| e2e.Logf("Failed to get service %q: %v, retrying...", gwServiceName, err) | ||
| e2e.Logf("Failed to get service %q: %v, retrying...", serviceName, err) | ||
| return false, nil | ||
| } | ||
| if len(lbService.Status.LoadBalancer.Ingress) == 0 { | ||
| e2e.Logf("Service %q has no load balancer; retrying...", gwServiceName) | ||
| e2e.Logf("Service %q has no load balancer; retrying...", serviceName) | ||
| return false, nil | ||
| } | ||
| if lbService.Status.LoadBalancer.Ingress[0].Hostname != "" { | ||
|
|
@@ -773,11 +775,20 @@ func assertGatewayLoadbalancerReady(oc *exutil.CLI, gwName, gwServiceName string | |
| lbAddress = lbService.Status.LoadBalancer.Ingress[0].IP | ||
| } | ||
| if lbAddress == "" { | ||
| e2e.Logf("No load balancer address for service %q, retrying", gwServiceName) | ||
| e2e.Logf("No load balancer address for service %q, retrying", serviceName) | ||
| return false, nil | ||
| } | ||
| e2e.Logf("Got load balancer address for service %q: %v", gwServiceName, lbAddress) | ||
| return true, nil | ||
| }) | ||
| o.Expect(err).NotTo(o.HaveOccurred(), "Timed out to get load balancer address of service %q", serviceName) | ||
| e2e.Logf("Got load balancer address for service %q: %v", serviceName, lbAddress) | ||
| return lbAddress | ||
| } | ||
|
|
||
| // assertGatewayLoadbalancerReady verifies that the given gateway has the service's load balancer address assigned. | ||
| func assertGatewayLoadbalancerReady(oc *exutil.CLI, gwName, gwServiceName string) { | ||
| lbAddress := getLoadBalancerAddress(oc, gwServiceName) | ||
| err := wait.PollUntilContextTimeout(context.Background(), 1*time.Second, loadBalancerReadyTimeout, false, func(context context.Context) (bool, error) { | ||
| gw, err := oc.AdminGatewayApiClient().GatewayV1().Gateways(ingressNamespace).Get(context, gwName, metav1.GetOptions{}) | ||
| if err != nil { | ||
| e2e.Logf("Failed to get gateway %q: %v; retrying...", err, gwName) | ||
|
|
@@ -795,10 +806,17 @@ func assertGatewayLoadbalancerReady(oc *exutil.CLI, gwName, gwServiceName string | |
| o.Expect(err).NotTo(o.HaveOccurred(), "Timed out waiting for gateway %q to get load balancer address of service %q", gwName, gwServiceName) | ||
| } | ||
|
|
||
| // assertDNSRecordStatus polls until the DNSRecord's status in the default operand namespace is True. | ||
| // assertDNSRecordStatus polls until the DNSRecord's status in the default operand namespace is ready. | ||
| // When DNS is managed, it waits for all zones to have Published=True. | ||
| // When DNS is unmanaged, it waits for all zones to have Published!=True (expected in custom-dns clusters). | ||
| func assertDNSRecordStatus(oc *exutil.CLI, gatewayName string) { | ||
| // find the DNS Record and confirm its zone status is True | ||
| err := wait.PollUntilContextTimeout(context.Background(), 2*time.Second, 10*time.Minute, false, func(context context.Context) (bool, error) { | ||
| dnsManaged, err := isDNSZoneConfigured(oc) | ||
| if err != nil { | ||
| e2e.Failf("Failed to check if DNS zone is configured: %v", err) | ||
| } | ||
|
|
||
| // find the DNS Record and confirm its zone status | ||
| err = wait.PollUntilContextTimeout(context.Background(), 2*time.Second, 10*time.Minute, false, func(context context.Context) (bool, error) { | ||
| gatewayDNSRecord := &operatoringressv1.DNSRecord{} | ||
| gatewayDNSRecords, err := oc.AdminIngressClient().IngressV1().DNSRecords(ingressNamespace).List(context, metav1.ListOptions{}) | ||
| if err != nil { | ||
|
|
@@ -810,20 +828,43 @@ func assertDNSRecordStatus(oc *exutil.CLI, gatewayName string) { | |
| for _, record := range gatewayDNSRecords.Items { | ||
| if record.Labels["gateway.networking.k8s.io/gateway-name"] == gatewayName { | ||
| gatewayDNSRecord = &record | ||
| e2e.Logf("Found the desired dnsrecord and spec is: %v", gatewayDNSRecord.Spec) | ||
| break | ||
| } | ||
| } | ||
|
|
||
| // checking the gateway DNS record status | ||
| if len(gatewayDNSRecord.Status.Zones) == 0 { | ||
| e2e.Logf("DNS record %q has no zones yet, retrying...", gatewayDNSRecord.Name) | ||
| return false, nil | ||
| } | ||
|
|
||
| // Check the Published condition for each zone | ||
| for _, zone := range gatewayDNSRecord.Status.Zones { | ||
| published := false | ||
| for _, condition := range zone.Conditions { | ||
| if condition.Type == "Published" && condition.Status == "True" { | ||
| return true, nil | ||
| if condition.Type != "Published" { | ||
| continue | ||
| } | ||
| e2e.Logf("The published status is %v for zone %v", condition.Status, zone.DNSZone) | ||
| if dnsManaged && condition.Status != "True" { | ||
| e2e.Logf("DNS record %q zone %v is not published yet, retrying...", gatewayDNSRecord.Name, zone.DNSZone) | ||
| return false, nil | ||
| } | ||
| if !dnsManaged && condition.Status == "True" { | ||
| e2e.Logf("DNS record %q zone %v is unexpectedly published for unmanaged DNS, retrying...", gatewayDNSRecord.Name, zone.DNSZone) | ||
| return false, nil | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not knowing the Gateway API code, would you be OK ignoring this condition and recording as a debug/info message instead of reporting this as an error?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure I'm following: this code can never be reached in any Gateway API test scenario today. If DNS isn't configured, there are no zones to check against. Gateway API DNS is strictly on or off, there's no partial state. I think I understand some of the confusion - when #30131 was written before I added For DNS concerns, the previously merged #30946 already handles the unmanaged DNS case for Gateway API tests. The changes to
Let me know if I'm missing something, but I'd rather not change anything unnecessarily. |
||
| } | ||
| published = true | ||
| break | ||
| } | ||
| if !published { | ||
| e2e.Logf("DNS record %q zone %v has no Published condition, retrying...", gatewayDNSRecord.Name, zone.DNSZone) | ||
| return false, nil | ||
| } | ||
| } | ||
| e2e.Logf("DNS record %q is not ready, retrying...", gatewayDNSRecord.Name) | ||
| return false, nil | ||
|
|
||
| e2e.Logf("All zones are checked and DNS record %q is ready", gatewayDNSRecord.Name) | ||
| return true, nil | ||
| }) | ||
| o.Expect(err).NotTo(o.HaveOccurred(), "Timed out waiting for gateway %q DNSRecord to become ready", gatewayName) | ||
| } | ||
|
|
@@ -1040,25 +1081,29 @@ func assertHttpRouteSuccessful(oc *exutil.CLI, gwName, name string) (*gatewayapi | |
| } | ||
|
|
||
| // assertHttpRouteConnection checks if the http route of the given name replies successfully, | ||
| // and returns an error if not | ||
| func assertHttpRouteConnection(hostname string) { | ||
| // Create the http client to check the response status code. | ||
| client := &http.Client{ | ||
| Timeout: 10 * time.Second, | ||
| Transport: &http.Transport{ | ||
| TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, | ||
| }, | ||
| // and returns an error if not. This function should only be called when loadBalancerSupported is true. | ||
| func assertHttpRouteConnection(oc *exutil.CLI, gwServiceName, hostname string) { | ||
| isDNSZoneConfigured, err := isDNSZoneConfigured(oc) | ||
| if err != nil { | ||
| e2e.Failf("Failed to check if DNS zone is configured: %v", err) | ||
| } | ||
| lbAddress := "" | ||
| if isDNSZoneConfigured { | ||
| err := wait.PollUntilContextTimeout(context.Background(), 20*time.Second, dnsResolutionTimeout, false, func(context context.Context) (bool, error) { | ||
| _, err := net.LookupHost(hostname) | ||
| if err != nil { | ||
| e2e.Logf("[%v] Failed to resolve HTTP route's hostname %q: %v, retrying...", time.Now(), hostname, err) | ||
| return false, nil | ||
| } | ||
| return true, nil | ||
| }) | ||
| o.Expect(err).NotTo(o.HaveOccurred(), "Timed out waiting for HTTP route's hostname %q to be resolved: %v", hostname, err) | ||
| } else { | ||
| lbAddress = getLoadBalancerAddress(oc, gwServiceName) | ||
| } | ||
|
|
||
| err := wait.PollUntilContextTimeout(context.Background(), 20*time.Second, dnsResolutionTimeout, false, func(context context.Context) (bool, error) { | ||
| _, err := net.LookupHost(hostname) | ||
| if err != nil { | ||
| e2e.Logf("[%v] Failed to resolve HTTP route's hostname %q: %v, retrying...", time.Now(), hostname, err) | ||
| return false, nil | ||
| } | ||
| return true, nil | ||
| }) | ||
| o.Expect(err).NotTo(o.HaveOccurred(), "Timed out waiting for HTTP route's hostname %q to be resolved: %v", hostname, err) | ||
| // Create the http client to check the response status code. | ||
| client := makeHTTPClient(false, 10*time.Second, lbAddress) | ||
|
|
||
| // Wait for http route to respond, and when it does, check for the status code. | ||
| err = wait.PollUntilContextTimeout(context.Background(), 5*time.Second, 5*time.Minute, false, func(context context.Context) (bool, error) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -196,7 +196,7 @@ var _ = g.Describe("[sig-network-edge][Conformance][Area:Networking][Feature:Rou | |
| pemCrt2, err := certgen.MarshalCertToPEMString(tlsCrt2Data) | ||
| o.Expect(err).NotTo(o.HaveOccurred()) | ||
|
|
||
| shardFQDN := oc.Namespace() + "." + defaultDomain | ||
| shardFQDN := oc.Namespace() + "." + baseDomain | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
set -euo pipefail
# Verify symbol usage in this file/package.
rg -n --type=go '\bdefaultDomain\b' test/extended/router/grpc-interop.go
rg -n --type=go '\bbaseDomain\b' test/extended/router/grpc-interop.go
rg -n --type=go '\bgetClusterBaseDomainName\s*\(' test/extended/routerRepository: openshift/origin Length of output: 511 Fix undefined Line 199 uses Suggested fix- defaultDomain, err := getDefaultIngressClusterDomainName(oc, time.Minute)
- o.Expect(err).NotTo(o.HaveOccurred(), "failed to find default domain name")
+ baseDomain, err := getClusterBaseDomainName(oc, time.Minute)
+ o.Expect(err).NotTo(o.HaveOccurred(), "failed to find cluster base domain name")🤖 Prompt for AI Agents |
||
|
|
||
| g.By("Creating routes to test for gRPC interoperability") | ||
| routeType := oc.Namespace() | ||
|
|
@@ -330,6 +330,15 @@ var _ = g.Describe("[sig-network-edge][Conformance][Area:Networking][Feature:Rou | |
| o.Expect(shardService).NotTo(o.BeNil()) | ||
| o.Expect(shardService.Status.LoadBalancer.Ingress).To(o.Not(o.BeEmpty())) | ||
|
|
||
| isDNSManaged, err := isDNSManaged(oc, time.Minute) | ||
| if err != nil { | ||
| e2e.Failf("Failed to get default ingresscontroller DNSManaged status: %v", err) | ||
| } | ||
| lbAddress := "" | ||
| if !isDNSManaged { | ||
| lbAddress = getLoadBalancerAddress(oc, "router-"+oc.Namespace()) | ||
| } | ||
|
|
||
| testCases := []string{ | ||
| "cancel_after_begin", | ||
| "cancel_after_first_response", | ||
|
|
@@ -352,15 +361,15 @@ var _ = g.Describe("[sig-network-edge][Conformance][Area:Networking][Feature:Rou | |
| routev1.TLSTerminationReencrypt, | ||
| routev1.TLSTerminationPassthrough, | ||
| } { | ||
| err := grpcExecTestCases(oc, routeType, 5*time.Minute, testCases...) | ||
| err := grpcExecTestCases(oc, routeType, 5*time.Minute, lbAddress, testCases...) | ||
| o.Expect(err).NotTo(o.HaveOccurred()) | ||
| } | ||
| }) | ||
| }) | ||
| }) | ||
|
|
||
| // grpcExecTestCases run gRPC interop test cases. | ||
| func grpcExecTestCases(oc *exutil.CLI, routeType routev1.TLSTerminationType, timeout time.Duration, testCases ...string) error { | ||
| func grpcExecTestCases(oc *exutil.CLI, routeType routev1.TLSTerminationType, timeout time.Duration, lbAddress string, testCases ...string) error { | ||
| host, err := getHostnameForRoute(oc, fmt.Sprintf("grpc-interop-%s", routeType)) | ||
| if err != nil { | ||
| return err | ||
|
|
@@ -371,6 +380,7 @@ func grpcExecTestCases(oc *exutil.CLI, routeType routev1.TLSTerminationType, tim | |
| Port: 443, | ||
| UseTLS: true, | ||
| Insecure: true, | ||
| Target: lbAddress, | ||
| } | ||
|
|
||
| if routeType == "h2c" { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So we are now enabling
assertDNSRecordStatusto check DNSRecords for unmanaged clusters in this PR - however, this logic will never pass because (see comment above) Gateway API will be unmanaged only when there are no zones. So this will get stuck.I think we have two options:
assertDNSRecordStatusfor unsupported DNS clusters (and remove all changes toassertDNSRecordStatus)dnsManagedhere, and return early - there are no zones to checkGateway API DNS logic is simpler than IngressController: the
DNSRecordis always created, and whether it actually publishes depends only on whether zones exist and the domain matches baseDomain. There's no per-Gateway opt-out mechanism like the IngressController'sdnsManagementPolicyorendpointPublishingStrategyfields.