Skip to content

Commit 9e7f5b8

Browse files
committed
Fixed format
1 parent fc1e8db commit 9e7f5b8

File tree

4 files changed

+55
-25
lines changed

4 files changed

+55
-25
lines changed

cmd/whisper/transcribe.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
types "github.com/mutablelogic/go-server/pkg/types"
1414
whisper "github.com/mutablelogic/go-whisper"
1515
client "github.com/mutablelogic/go-whisper/pkg/client"
16+
"github.com/mutablelogic/go-whisper/pkg/client/openai"
1617
schema "github.com/mutablelogic/go-whisper/pkg/schema"
1718
task "github.com/mutablelogic/go-whisper/pkg/task"
1819
wav "github.com/mutablelogic/go-whisper/pkg/wav"
@@ -155,7 +156,7 @@ func (cmd *TranslateCmd) run_remote(app *Globals, translate bool) error {
155156

156157
// Create an array of parameters for the transcription
157158
params := []client.Opt{
158-
client.OptPath("audio.wav"), client.OptFormat("json"), client.OptLanguage(cmd.Language),
159+
client.OptPath("audio.wav"), client.OptFormat(openai.FormatVerboseJson), client.OptLanguage(cmd.Language),
159160
}
160161
if cmd.Diarize {
161162
params = append(params, client.OptDiarize())

pkg/api/transcribe.go

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@ package api
22

33
import (
44
"context"
5+
"fmt"
56
"io"
67
"net/http"
8+
"slices"
79
"strings"
810
"time"
911

@@ -14,6 +16,7 @@ import (
1416
"github.com/mutablelogic/go-server/pkg/types"
1517
"github.com/mutablelogic/go-whisper"
1618
"github.com/mutablelogic/go-whisper/pkg/client/gowhisper"
19+
"github.com/mutablelogic/go-whisper/pkg/client/openai"
1720
"github.com/mutablelogic/go-whisper/pkg/schema"
1821
"github.com/mutablelogic/go-whisper/pkg/task"
1922
)
@@ -34,6 +37,16 @@ func TranscribeFile(ctx context.Context, service *whisper.Whisper, w http.Respon
3437
return httpresponse.Error(w, httpresponse.ErrNotFound, req.Model)
3538
}
3639

40+
// Check the format
41+
format := strings.TrimSpace(types.PtrString(req.Format))
42+
if format == "" {
43+
format = openai.Formats[0] // Default to first format
44+
} else if !slices.Contains(openai.Formats, format) {
45+
return httpresponse.Error(w, httpresponse.ErrBadRequest.Withf("Unsupported format: %q", format))
46+
} else {
47+
fmt.Println("TranscribeFile: format:", format)
48+
}
49+
3750
// Start a translation task
3851
var result *schema.Transcription
3952
if err := service.WithModel(model, func(taskctx *task.Context) error {
@@ -71,7 +84,7 @@ func TranscribeFile(ctx context.Context, service *whisper.Whisper, w http.Respon
7184
}
7285

7386
// Response to client
74-
return response(w, types.PtrString(req.Format), result)
87+
return response(w, format, result)
7588
}
7689

7790
func TranslateFile(ctx context.Context, service *whisper.Whisper, w http.ResponseWriter, r *http.Request) error {
@@ -87,6 +100,16 @@ func TranslateFile(ctx context.Context, service *whisper.Whisper, w http.Respons
87100
return httpresponse.Error(w, httpresponse.ErrNotFound, req.Model)
88101
}
89102

103+
// Check the format
104+
format := strings.TrimSpace(types.PtrString(req.Format))
105+
if format == "" {
106+
format = openai.Formats[0] // Default to first format
107+
} else if !slices.Contains(openai.Formats, format) {
108+
return httpresponse.Error(w, httpresponse.ErrBadRequest.Withf("Unsupported format: %q", format))
109+
} else {
110+
fmt.Println("TranscribeFile: format:", format)
111+
}
112+
90113
// Cannot diarize when translating
91114
if req.Diarize != nil {
92115
return httpresponse.Error(w, httpresponse.ErrBadRequest, "Cannot diarize when translating")
@@ -124,7 +147,7 @@ func TranslateFile(ctx context.Context, service *whisper.Whisper, w http.Respons
124147
}
125148

126149
// Response to client
127-
return response(w, types.PtrString(req.Format), result)
150+
return response(w, format, result)
128151
}
129152

130153
func segment(ctx context.Context, taskctx *task.Context, r io.Reader, fn func(seg *schema.Segment)) error {
@@ -145,30 +168,22 @@ func segment(ctx context.Context, taskctx *task.Context, r io.Reader, fn func(se
145168
return nil
146169
}
147170

148-
const (
149-
FormatJson = "json"
150-
FormatVerboseJson = "verbose_json"
151-
FormatText = "text"
152-
FormatSrt = "srt"
153-
FormatVtt = "vtt"
154-
)
155-
156171
func response(w http.ResponseWriter, format string, response *schema.Transcription) error {
157172
switch strings.ToLower(format) {
158-
case FormatJson, FormatVerboseJson:
173+
case openai.FormatJson, openai.FormatVerboseJson:
159174
return httpresponse.JSON(w, http.StatusOK, 2, response)
160-
case FormatText, "":
175+
case openai.FormatText, "":
161176
return httpresponse.Write(w, http.StatusOK, types.ContentTypeTextPlain, func(w io.Writer) (int, error) {
162177
return w.Write([]byte(response.Text))
163178
})
164-
case FormatSrt:
179+
case openai.FormatSrt:
165180
return httpresponse.Write(w, http.StatusOK, "application/x-subrip", func(w io.Writer) (int, error) {
166181
for _, seg := range response.Segments {
167182
task.WriteSegmentSrt(w, seg)
168183
}
169184
return 0, nil
170185
})
171-
case FormatVtt:
186+
case openai.FormatVtt:
172187
return httpresponse.Write(w, http.StatusOK, "text/vtt", func(w io.Writer) (int, error) {
173188
if _, err := w.Write([]byte("WEBVTT\n\n")); err != nil {
174189
return 0, err

pkg/client/openai/schema.go

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,14 +62,18 @@ const (
6262
)
6363

6464
const (
65-
FormatJson = "verbose_json"
66-
FormatText = "text"
67-
FormatSrt = "srt"
68-
FormatVtt = "vtt"
65+
FormatJson = "json"
66+
FormatVerboseJson = "verbose_json"
67+
FormatText = "text"
68+
FormatSrt = "srt"
69+
FormatVtt = "vtt"
6970
)
7071

7172
var (
72-
Models = []string{"whisper-1"} // Supported models for transcription and translation
73+
Models = []string{"whisper-1"} // Supported models for transcription and translation
74+
Formats = []string{
75+
FormatJson, FormatVerboseJson, FormatText, FormatSrt, FormatVtt,
76+
}
7377
)
7478

7579
/////////////////////////////////////////////////////////////////////////////////

pkg/client/opts.go

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -89,14 +89,24 @@ func OptLanguage(language string) Opt {
8989
}
9090
}
9191

92-
// Set format for transcription (json, srt, vtt, text)
92+
// Set format for transcription (json, verbose_json, srt, vtt, text)
9393
func OptFormat(v string) Opt {
9494
return func(api apitype, o *opts) error {
95-
// Convert json to verbose format
96-
if v == "json" {
97-
v = openai.FormatJson
95+
// Check format
96+
if !slices.Contains(openai.Formats, v) {
97+
return httpresponse.ErrBadRequest.Withf("format %q not supported", v)
9898
}
99-
o.openai.Format = types.StringPtr(v)
99+
100+
// Set format
101+
switch api {
102+
case apiopenai, apigowhisper:
103+
o.gowhisper.Format = types.StringPtr(v)
104+
o.openai.Format = types.StringPtr(v)
105+
default:
106+
return httpresponse.ErrBadRequest.Withf("format %q not supported", v)
107+
}
108+
109+
// Return success
100110
return nil
101111
}
102112
}

0 commit comments

Comments
 (0)