Skip to content

Enabling baggage cache to support limits and non-ascii characters #8713

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 7, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ public BaggagePropagator(boolean injectBaggage, boolean extractBaggage) {
public <C> void inject(Context context, C carrier, CarrierSetter<C> setter) {
int maxItems = this.config.getTraceBaggageMaxItems();
int maxBytes = this.config.getTraceBaggageMaxBytes();
//noinspection ConstantValue
// noinspection ConstantValue
if (!this.injectBaggage
|| maxItems == 0
|| maxBytes == 0
Expand Down Expand Up @@ -108,7 +108,8 @@ public <C> Context extract(Context context, C carrier, CarrierVisitor<C> visitor
if (!this.extractBaggage || context == null || carrier == null || visitor == null) {
return context;
}
BaggageExtractor baggageExtractor = new BaggageExtractor();
BaggageExtractor baggageExtractor =
new BaggageExtractor(config.getTraceBaggageMaxItems(), config.getTraceBaggageMaxBytes());
visitor.forEachKeyValue(carrier, baggageExtractor);
return baggageExtractor.extracted == null ? context : context.with(baggageExtractor.extracted);
}
Expand All @@ -117,8 +118,15 @@ private static class BaggageExtractor implements BiConsumer<String, String> {
private static final char KEY_VALUE_SEPARATOR = '=';
private static final char PAIR_SEPARATOR = ',';
private Baggage extracted;

private BaggageExtractor() {}
private final int maxItems;
private final int maxBytes;
private String w3cHeader;

private BaggageExtractor(int maxItems, int maxBytes) {
this.maxItems = maxItems;
this.maxBytes = maxBytes;
this.w3cHeader = null;
}

/** URL decode value */
private String decode(final String value) {
Expand All @@ -134,6 +142,7 @@ private String decode(final String value) {
private Map<String, String> parseBaggageHeaders(String input) {
Map<String, String> baggage = new HashMap<>();
int start = 0;
boolean truncatedCache = false;
int pairSeparatorInd = input.indexOf(PAIR_SEPARATOR);
pairSeparatorInd = pairSeparatorInd == -1 ? input.length() : pairSeparatorInd;
int kvSeparatorInd = input.indexOf(KEY_VALUE_SEPARATOR);
Expand All @@ -152,11 +161,29 @@ private Map<String, String> parseBaggageHeaders(String input) {
}
baggage.put(key, value);

// need to percent-encode non-ascii headers we pass down
if (UTF_ESCAPER.keyNeedsEncoding(key) || UTF_ESCAPER.valNeedsEncoding(value)) {
truncatedCache = true;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It’s not strictly truncated, it’s invalid in this case as non ascii characters are not allowed in HTTP headers. I though the RFC asked to drop the header and not parse it if it was invalid.

From RFC encoding:

In other words, the HTTP baggage header is encoded using only ASCII characters.

From RFC error handling:

When this occurs, the APM SDK should not try to extract any values from the entire header. In other words, APM SDKs should ignore the header and not try to extract "good" values while ignoring "bad" ones.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would argue that this isn't an invalid case and should be something that we can support since HTTP clients technically can send non-ascii characters as headers and we do support non-ascii characters as baggage. I would say that the invalid case is more for parsing issues with not being able to figure out what baggage should be used.

this.w3cHeader = null;
} else if (!truncatedCache && (end > this.maxBytes || baggage.size() > this.maxItems)) {
if(start == 0) { // if we go out of range after first k/v pair, there is no cache
this.w3cHeader = null;
} else{
this.w3cHeader = input.substring(0, start - 1); // -1 to ignore the k/v separator
}
truncatedCache = true;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should break here if you reach max size or max bytes.
It will simply the if clause too.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even if we don't cache here, we still need to maintain all of the raw baggage data so we can't break.

}

kvSeparatorInd = input.indexOf(KEY_VALUE_SEPARATOR, pairSeparatorInd + 1);
pairSeparatorInd = input.indexOf(PAIR_SEPARATOR, pairSeparatorInd + 1);
pairSeparatorInd = pairSeparatorInd == -1 ? input.length() : pairSeparatorInd;
start = end + 1;
}

if (!truncatedCache) {
this.w3cHeader = input;
}

return baggage;
}

Expand All @@ -166,7 +193,7 @@ public void accept(String key, String value) {
if (BAGGAGE_KEY.equalsIgnoreCase(key)) {
Map<String, String> baggage = parseBaggageHeaders(value);
if (!baggage.isEmpty()) {
this.extracted = Baggage.create(baggage, value);
this.extracted = Baggage.create(baggage, this.w3cHeader);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,41 @@ public Escaped escapeValue(String s) {
return escape(s, unsafeValOctets);
}

public boolean needsEncoding(char c, boolean[] unsafeOctets) {
if (c > '~' || c <= ' ' || c < unsafeOctets.length && unsafeOctets[c]) {
return true;
}
return false;
}

public boolean keyNeedsEncoding(String key) {
int slen = key.length();
for (int index = 0; index < slen; index++) {
char c = key.charAt(index);
if (needsEncoding(c, unsafeKeyOctets)) {
return true;
}
}
return false;
}

public boolean valNeedsEncoding(String val) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I included both of these to avoid checking which unsafe characters to use for each string we decode. Let me know if that is preferred for code-conciseness

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No that’s good 👍
But you can make it more concise if you like by still having the public boolean keyNeedsEncoding(String key) and public boolean valNeedsEncoding(String val) that calls an private method public boolean needsEncoding(String s, boolean[] unsafeOctets) and do all the logic / iteration on this single method (the length, for, charAt, etc...).

int slen = val.length();
for (int index = 0; index < slen; index++) {
char c = val.charAt(index);
if (needsEncoding(c, unsafeValOctets)) {
return true;
}
}
return false;
}

/** Escape the provided String, using percent-style URL Encoding. */
public Escaped escape(String s, boolean[] unsafeOctets) {
int slen = s.length();
for (int index = 0; index < slen; index++) {
char c = s.charAt(index);
if (c > '~' || c <= ' ' || c <= unsafeOctets.length && unsafeOctets[c]) {
if (needsEncoding(c, unsafeOctets)) {
return escapeSlow(s, index, unsafeOctets);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class BaggagePropagatorTest extends DDSpecification {
["abcdefg": "hijklmnopq♥"] | "abcdefg=hijklmnopq%E2%99%A5"
}

def "test baggage item limit"() {
def "test baggage inject item limit"() {
setup:
injectSysConfig("trace.baggage.max.items", '2')
propagator = new BaggagePropagator(true, true) //creating a new instance after injecting config
Expand All @@ -79,7 +79,7 @@ class BaggagePropagatorTest extends DDSpecification {
[key1: "val1", key2: "val2", key3: "val3"] | "key1=val1,key2=val2"
}

def "test baggage bytes limit"() {
def "test baggage inject bytes limit"() {
setup:
injectSysConfig("trace.baggage.max.bytes", '20')
propagator = new BaggagePropagator(true, true) //creating a new instance after injecting config
Expand Down Expand Up @@ -139,7 +139,7 @@ class BaggagePropagatorTest extends DDSpecification {
"=" | _
}

def "testing baggage cache"(){
def "test baggage cache"(){
setup:
def headers = [
(BAGGAGE_KEY) : baggageHeader,
Expand All @@ -150,17 +150,54 @@ class BaggagePropagatorTest extends DDSpecification {

then:
Baggage baggageContext = Baggage.fromContext(context)
baggageContext.asMap() == baggageMap
baggageContext.w3cHeader == cachedString

where:
baggageHeader | cachedString
"key1=val1,key2=val2,foo=bar" | "key1=val1,key2=val2,foo=bar"
'";\\()/:<=>?@[]{}=";\\' | null
}

def "test baggage cache items limit"(){
setup:
injectSysConfig("trace.baggage.max.items", "2")
propagator = new BaggagePropagator(true, true) //creating a new instance after injecting config
def headers = [
(BAGGAGE_KEY) : baggageHeader,
]

when:
this.propagator.inject(context, carrier, setter)
context = this.propagator.extract(context, headers, ContextVisitors.stringValuesMap())

then:
assert carrier[BAGGAGE_KEY] == baggageHeader
Baggage baggageContext = Baggage.fromContext(context)
baggageContext.getW3cHeader() as String == cachedString

where:
baggageHeader | baggageMap
"key1=val1,key2=val2,foo=bar" | ["key1": "val1", "key2": "val2", "foo": "bar"]
"%22%2C%3B%5C%28%29%2F%3A%3C%3D%3E%3F%40%5B%5D%7B%7D=%22%2C%3B%5C" | ['",;\\()/:<=>?@[]{}': '",;\\']
baggageHeader | cachedString
"key1=val1,key2=val2" | "key1=val1,key2=val2"
"key1=val1,key2=val2,key3=val3" | "key1=val1,key2=val2"
"key1=val1,key2=val2,key3=val3,key4=val4" | "key1=val1,key2=val2"
}

def "test baggage cache bytes limit"(){
setup:
injectSysConfig("trace.baggage.max.bytes", "20")
propagator = new BaggagePropagator(true, true) //creating a new instance after injecting config
def headers = [
(BAGGAGE_KEY) : baggageHeader,
]

when:
context = this.propagator.extract(context, headers, ContextVisitors.stringValuesMap())

then:
Baggage baggageContext = Baggage.fromContext(context)
baggageContext.getW3cHeader() as String == cachedString

where:
baggageHeader | cachedString
"key1=val1,key2=val2" | "key1=val1,key2=val2"
"key1=val1,key2=val2,key3=val3" | "key1=val1,key2=val2"
}
}