Skip to content

Commit a7b8dc9

Browse files
authored
feat: The "session request" is no longer managed by the client. The hosting application is expected to manage the session request. This has presented a cleaner and more reliable API for us in production.
BREAKING CHANGE: The sessionRequest argument is no longer part of the constructor. The sessionRequested argument is no longe provided to the getRealtimeEphemeralAPIKey callback. The host application should know what session it wants and can always request whatever session it wants in the getRealtimeEphemeralAPIKey. See the WebRTC example for how to do this.
1 parent cf4bc92 commit a7b8dc9

File tree

3 files changed

+57
-79
lines changed

3 files changed

+57
-79
lines changed

apps/browser-example/src/pages/WebRTCExample.tsx

+28-11
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@ import {
55
} from "../components/RealtimeSessionView"
66
import { RealtimeClient } from "@tsorta/browser/WebRTC"
77
import { PageProps } from "./props"
8-
import { RealtimeConversationItem } from "@tsorta/browser/openai"
8+
import {
9+
RealtimeConversationItem,
10+
RealtimeSessionCreateResponse,
11+
} from "@tsorta/browser/openai"
912

1013
export function WebRTCExample({
1114
apiKey,
@@ -37,17 +40,25 @@ export function WebRTCExample({
3740

3841
const client = new RealtimeClient(
3942
navigator,
40-
// @ts-expect-error TS6133: 'sessionRequested' is declared but its value is never read.
41-
({ sessionRequested }) => {
43+
async () => {
4244
// NOTE: For the sake of the example, we're using a "real" OpenAI API
43-
// key rather than a Realtime API Session ephemeral key, as you
44-
// should do in a production app. So this sessionRequested argument
45-
// isn't useful in the example, but in a production app you can use
46-
// it to request a session with the these parameters.
47-
return apiKey
45+
// key in *the browser*. **DO NOT DO THIS**. You should make this request
46+
// for the ephemeral key on a backend server where you can protect
47+
// the key.
48+
49+
const r = await fetch("https://api.openai.com/v1/realtime/sessions", {
50+
method: "POST",
51+
headers: {
52+
Authorization: `Bearer ${apiKey}`,
53+
"Content-Type": "application/json",
54+
},
55+
body: JSON.stringify(sessionRequest),
56+
})
57+
const data = (await r.json()) as RealtimeSessionCreateResponse
58+
59+
return data.client_secret.value
4860
},
49-
audioElementRef.current,
50-
sessionRequest
61+
audioElementRef.current
5162
)
5263
setClient(client)
5364

@@ -59,7 +70,13 @@ export function WebRTCExample({
5970
setConversation(event.conversation)
6071
})
6172

62-
await client.start()
73+
try {
74+
await client.start()
75+
} catch (e) {
76+
// TODO: put an alert on the top to show error
77+
console.error("Error starting session", e)
78+
return
79+
}
6380

6481
onSessionStatusChanged("recording")
6582
},

packages/browser/src/WebRTC/RealtimeClient.ts

+26-68
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,6 @@ const RealtimeClientDefaultOptions: RealtimeClientOptions = {
5353
baseUrl: "https://api.openai.com/v1/realtime",
5454
}
5555

56-
interface EphemeralApiKeyOptions {
57-
sessionRequested: RealtimeSessionCreateRequest
58-
}
5956
/**
6057
* A TypeScript client for the OpenAI Realtime API using WebRTC in the browser.
6158
*/
@@ -78,16 +75,12 @@ export class RealtimeClient {
7875
/**
7976
* Create a new client.
8077
* @param getRealtimeEphemeralAPIKey This is a function that you should implement to return the Ephemeral OpenAI API key that is used to authenticate with the OpenAI Realtime API. It should be an ephemeral key as described at https://platform.openai.com/docs/guides/realtime-webrtc#creating-an-ephemeral-token. You will probably need to make a call to your server here to fetch the key.
81-
* @param sessionRequested The session parameters you want from the Realtime API. If these are found to be different it will re-request them to try to match this session.
8278
*/
8379
constructor(
8480
private readonly navigator: Navigator,
85-
private readonly getRealtimeEphemeralAPIKey: (
86-
options: EphemeralApiKeyOptions
87-
) => Promise<string> | string,
81+
private readonly getRealtimeEphemeralAPIKey: () => Promise<string> | string,
8882
private readonly audioElement: HTMLAudioElement,
89-
private readonly sessionRequested: RealtimeSessionCreateRequest,
90-
options: Partial<RealtimeClientOptions> = RealtimeClientDefaultOptions
83+
options: Partial<RealtimeClientOptions> = RealtimeClientDefaultOptions,
9184
) {
9285
const opt = { ...RealtimeClientDefaultOptions, ...options }
9386
this.recordedAudioChunkDuration = opt.recordedAudioChunkDuration
@@ -101,7 +94,7 @@ export class RealtimeClient {
10194
*/
10295
public addEventListener<TEventName extends keyof RealtimeClientEventMap>(
10396
event: TEventName,
104-
listener: EventTargetListener<RealtimeClientEventMap[TEventName]>
97+
listener: EventTargetListener<RealtimeClientEventMap[TEventName]>,
10598
): void {
10699
this.emitter.addEventListener(event, listener)
107100
}
@@ -204,15 +197,15 @@ export class RealtimeClient {
204197
this.audioChunks.push(...audioChunks)
205198
this.emitter.dispatchTypedEvent(
206199
"recordedAudioChanged",
207-
new RecordedAudioChangedEvent(this.audioChunks)
200+
new RecordedAudioChangedEvent(this.audioChunks),
208201
)
209202
}
210203

211204
private setRecordedAudio(audioChunks: Blob[]) {
212205
this.audioChunks = audioChunks
213206
this.emitter.dispatchTypedEvent(
214207
"recordedAudioChanged",
215-
new RecordedAudioChangedEvent(this.audioChunks)
208+
new RecordedAudioChangedEvent(this.audioChunks),
216209
)
217210
}
218211

@@ -225,9 +218,7 @@ export class RealtimeClient {
225218

226219
let apiKey: string
227220
try {
228-
apiKey = await this.getRealtimeEphemeralAPIKey({
229-
sessionRequested: this.sessionRequested,
230-
})
221+
apiKey = await this.getRealtimeEphemeralAPIKey()
231222
} catch (err) {
232223
throw new Error("getRealtimeEphemeralAPIKey handler failed.", {
233224
cause: err,
@@ -274,7 +265,7 @@ export class RealtimeClient {
274265
// Listen for server-sent events on the data channel
275266
this.dataChannel.addEventListener(
276267
"message",
277-
this.receiveServerMessage.bind(this)
268+
this.receiveServerMessage.bind(this),
278269
)
279270
this.dataChannel.addEventListener("error", (e) => {
280271
log.error("Data channel error from server: %o", e.error)
@@ -308,7 +299,7 @@ export class RealtimeClient {
308299
this.session = undefined
309300
this.emitter.dispatchTypedEvent(
310301
"sessionUpdated",
311-
new SessionUpdatedEvent(this.session)
302+
new SessionUpdatedEvent(this.session),
312303
)
313304
}
314305
}
@@ -323,7 +314,7 @@ export class RealtimeClient {
323314

324315
this.emitter.dispatchTypedEvent(
325316
"serverEvent",
326-
new RealtimeServerEventEvent(parsedEvent)
317+
new RealtimeServerEventEvent(parsedEvent),
327318
)
328319
}
329320

@@ -383,49 +374,15 @@ export class RealtimeClient {
383374
client.session = sessionEvent.session
384375
client.emitter.dispatchTypedEvent(
385376
"sessionCreated",
386-
new SessionCreatedEvent(sessionEvent.session)
377+
new SessionCreatedEvent(sessionEvent.session),
387378
)
388-
389-
if (!client.sessionRequested) {
390-
throw new Error("No session request")
391-
}
392-
393-
// NOTE: When we create a session with OpenAI, it ignores things like input_audio_transcription?.model !== "whisper-1"; So we update it again if it doesn't match the session.
394-
let updatedSession: RealtimeSessionCreateRequest = {
395-
...client.sessionRequested,
396-
}
397-
let hasSessionMismatch = false
398-
399-
for (const key of Object.keys(client.sessionRequested) as Array<
400-
keyof RealtimeSessionCreateRequest
401-
>) {
402-
const requestValue = client.sessionRequested[key]
403-
const sessionValue = sessionEvent.session[key]
404-
405-
if (compareValuesIgnoreNullProperties(requestValue, sessionValue)) {
406-
continue
407-
}
408-
log.debug(
409-
`session mismatch on ${key}: %o !== %o`,
410-
requestValue,
411-
sessionValue
412-
)
413-
hasSessionMismatch = true
414-
}
415-
if (hasSessionMismatch) {
416-
const updateSessionEvent: RealtimeClientEventSessionUpdate = {
417-
type: "session.update",
418-
session: updatedSession,
419-
}
420-
client.sendClientEvent(updateSessionEvent)
421-
}
422379
},
423380
"session.updated": (client, event) => {
424381
const sessionEvent = event as RealtimeServerEventSessionUpdated
425382
client.session = sessionEvent.session
426383
client.emitter.dispatchTypedEvent(
427384
"sessionUpdated",
428-
new SessionUpdatedEvent(sessionEvent.session)
385+
new SessionUpdatedEvent(sessionEvent.session),
429386
)
430387
},
431388
"conversation.item.created": (client, event) => {
@@ -434,7 +391,7 @@ export class RealtimeClient {
434391
client.conversation.push(conversationEvent.item)
435392
client.emitter.dispatchTypedEvent(
436393
"conversationChanged",
437-
new ConversationChangedEvent(client.conversation)
394+
new ConversationChangedEvent(client.conversation),
438395
)
439396
},
440397
"response.audio_transcript.delta": (client, event) => {
@@ -445,7 +402,7 @@ export class RealtimeClient {
445402
client.conversation,
446403
deltaEvent.item_id,
447404
deltaEvent.content_index,
448-
deltaEvent
405+
deltaEvent,
449406
)
450407
if (!foundItem) {
451408
// error was logged in findConversationItemContent
@@ -462,15 +419,15 @@ export class RealtimeClient {
462419
} else {
463420
if (foundContent.type !== "input_audio") {
464421
log.error(
465-
`${event.type} Unexpected content type ${foundContent.type} for audio transcript`
422+
`${event.type} Unexpected content type ${foundContent.type} for audio transcript`,
466423
)
467424
return
468425
}
469426
foundContent.transcript += deltaEvent.delta
470427
}
471428
client.emitter.dispatchTypedEvent(
472429
"conversationChanged",
473-
new ConversationChangedEvent(client.conversation)
430+
new ConversationChangedEvent(client.conversation),
474431
)
475432
},
476433
"response.text.delta": (client, event) => {
@@ -500,15 +457,15 @@ export class RealtimeClient {
500457
{ log },
501458
client.conversation,
502459
output.id!,
503-
event
460+
event,
504461
)
505462
if (!conversationItem) {
506463
// TODO: findConversationItem already logged an error, we should probably pass in a value that tells it not to log
507464
// no existing item is there, for some reason maybe we missed it in the stream somehow? We'll just add it:
508465
client.conversation.push(output)
509466
client.emitter.dispatchTypedEvent(
510467
"conversationChanged",
511-
new ConversationChangedEvent(client.conversation)
468+
new ConversationChangedEvent(client.conversation),
512469
)
513470
continue
514471
}
@@ -523,43 +480,44 @@ export class RealtimeClient {
523480
// force update the conversation state:
524481
client.emitter.dispatchTypedEvent(
525482
"conversationChanged",
526-
new ConversationChangedEvent(client.conversation)
483+
new ConversationChangedEvent(client.conversation),
527484
)
528485
}
529486
},
530487
"response.audio_transcript.done": (client, event) => {
531488
patchConversationItemWithCompletedTranscript(
532489
{ log },
533490
client.conversation,
534-
event as RealtimeServerEventResponseAudioTranscriptDone
491+
event as RealtimeServerEventResponseAudioTranscriptDone,
535492
)
536493
client.emitter.dispatchTypedEvent(
537494
"conversationChanged",
538-
new ConversationChangedEvent(client.conversation)
495+
new ConversationChangedEvent(client.conversation),
539496
)
540497
},
541498
"conversation.item.input_audio_transcription.completed": (
542499
client,
543-
event
500+
event,
544501
) => {
545502
patchConversationItemWithCompletedTranscript(
546503
{ log },
547504
client.conversation,
548-
event
505+
event,
549506
)
550507
client.emitter.dispatchTypedEvent(
551508
"conversationChanged",
552-
new ConversationChangedEvent(client.conversation)
509+
new ConversationChangedEvent(client.conversation),
553510
)
554511
},
555512
}
556513
}
557514

558515
type RealtimeServerEventHandler<
559-
TRealtimeServerEventType extends RealtimeServerEvent["type"] = RealtimeServerEvent["type"]
516+
TRealtimeServerEventType extends
517+
RealtimeServerEvent["type"] = RealtimeServerEvent["type"],
560518
> = (
561519
client: RealtimeClient,
562-
event: Extract<RealtimeServerEvent, { type: TRealtimeServerEventType }>
520+
event: Extract<RealtimeServerEvent, { type: TRealtimeServerEventType }>,
563521
) => void
564522

565523
type RealtimeServerEventNames = RealtimeServerEvent["type"]

packages/browser/src/openai/index.ts

+3
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,9 @@ export type RealtimeSession = components["schemas"]["RealtimeSession"]
9696
export type RealtimeSessionCreateRequest =
9797
components["schemas"]["RealtimeSessionCreateRequest"]
9898

99+
export type RealtimeSessionCreateResponse =
100+
components["schemas"]["RealtimeSessionCreateResponse"]
101+
99102
export type RealTimeSessionModels = RealtimeSessionCreateRequest["model"]
100103

101104
/** Part of the @see RealtimeServerEventResponseDone event and others.

0 commit comments

Comments
 (0)