Commit 548a3fe8 authored by Mudassar Khan's avatar Mudassar Khan
Browse files

fix: mtls issue where while publishing a service connection was reset by the peer

parent 7e895d8b
Loading
Loading
Loading
Loading
+67 −17
Original line number Diff line number Diff line
@@ -39,6 +39,10 @@ import (
const (
	maxRetries    = 6
	retryBaseWait = 10 * time.Second
	// maxMTLSAttempts is the number of times a mTLS request is retried on connection reset.
	// The CAPIF server sits behind a load balancer where not all backends support mTLS;
	// a fresh TCP connection lets the LB route to a capable backend.
	maxMTLSAttempts = 3
)

// CapifClient manages the lifecycle of a CAPIF provider session
@@ -458,7 +462,8 @@ func (c *CapifClient) onboardProvider() error {
	return nil
}

// doPublish performs a single publish attempt using a snapshot of the current session state.
// doPublish performs a publish attempt, retrying on connection reset up to maxMTLSAttempts times.
// Each retry builds a fresh HTTP client to force a new TCP connection through the load balancer.
func (c *CapifClient) doPublish(apiName string, version string, resourceURI string, resourceName string, description string, interfaceAddr string, interfacePort int) (string, error) {
	c.mu.RLock()
	if !c.ready {
@@ -469,6 +474,8 @@ func (c *CapifClient) doPublish(apiName string, version string, resourceURI stri
	aefId := c.aefId
	publishUrl := c.ccfPublishUrl
	apfTLSClient := c.apfTLSClient
	apfCert := c.apfCert
	apfKey := c.apfKey
	c.mu.RUnlock()

	// Build the publish URL, replacing <apfId> placeholder if present
@@ -529,15 +536,35 @@ func (c *CapifClient) doPublish(apiName string, version string, resourceURI stri
		return "", err
	}

	req, err := http.NewRequest("POST", url, bytes.NewReader(bodyBytes))
	if err != nil {
		return "", err
	// Retry loop: on connection reset, build a fresh client to reach a different LB backend.
	var resp *http.Response
	var lastErr error
	for attempt := 1; attempt <= maxMTLSAttempts; attempt++ {
		var req *http.Request
		req, lastErr = http.NewRequest("POST", url, bytes.NewReader(bodyBytes))
		if lastErr != nil {
			return "", lastErr
		}
		req.Header.Set("Content-Type", "application/json")

	resp, err := apfTLSClient.Do(req)
	if err != nil {
		return "", fmt.Errorf("publish request failed: %w", err)
		client := apfTLSClient
		if attempt > 1 {
			if freshClient, buildErr := buildMutualTLSClient(apfCert, apfKey, c.capifDialContext); buildErr == nil {
				client = freshClient
			}
		}

		resp, lastErr = client.Do(req)
		if lastErr == nil {
			break
		}
		if !strings.Contains(lastErr.Error(), "connection reset") {
			return "", fmt.Errorf("publish request failed: %w", lastErr)
		}
		log.Warn(fmt.Sprintf("CAPIF publish mTLS attempt %d/%d connection reset, retrying: %v", attempt, maxMTLSAttempts, lastErr))
	}
	if lastErr != nil {
		return "", fmt.Errorf("publish request failed after %d attempts: %w", maxMTLSAttempts, lastErr)
	}
	defer resp.Body.Close()

@@ -606,21 +633,44 @@ func (c *CapifClient) UnpublishServiceAPI(apiId string) error {
	}
	apfId := c.apfId
	publishUrl := c.ccfPublishUrl
	apfTLSClient := c.apfTLSClient
	apfCert := c.apfCert
	apfKey := c.apfKey
	c.mu.RUnlock()

	publishPath := strings.Replace(publishUrl, "<apfId>", apfId, 1)
	url := fmt.Sprintf("https://%s:%s/%s/%s", c.config.CapifHostname, c.config.CapifPort, publishPath, apiId)
	log.Debug("CAPIF unpublishServiceAPI: ", url)

	req, err := http.NewRequest("DELETE", url, nil)
	if err != nil {
		return err
	// Retry loop: on connection reset, build a fresh client to reach a different LB backend.
	var resp *http.Response
	var lastErr error
	for attempt := 1; attempt <= maxMTLSAttempts; attempt++ {
		var req *http.Request
		req, lastErr = http.NewRequest("DELETE", url, nil)
		if lastErr != nil {
			return lastErr
		}
		req.Header.Set("Content-Type", "application/json")

	resp, err := c.apfTLSClient.Do(req)
	if err != nil {
		return fmt.Errorf("unpublish request failed: %w", err)
		client := apfTLSClient
		if attempt > 1 {
			if freshClient, buildErr := buildMutualTLSClient(apfCert, apfKey, c.capifDialContext); buildErr == nil {
				client = freshClient
			}
		}

		resp, lastErr = client.Do(req)
		if lastErr == nil {
			break
		}
		if !strings.Contains(lastErr.Error(), "connection reset") {
			return fmt.Errorf("unpublish request failed: %w", lastErr)
		}
		log.Warn(fmt.Sprintf("CAPIF unpublish mTLS attempt %d/%d connection reset, retrying: %v", attempt, maxMTLSAttempts, lastErr))
	}
	if lastErr != nil {
		return fmt.Errorf("unpublish request failed after %d attempts: %w", maxMTLSAttempts, lastErr)
	}
	defer resp.Body.Close()