Skip to content

Commit 81eb911

Browse files
author
Uoti Urpala
committed
subassconvert: make subrip attribute parsing more robust
Add general code to separate the HTML-like attribute=value syntax used in srt font tags into attribute and value parts. This simplifies some of the parsing code, makes detection of malformed input more robust, and allows warning about unrecognized attributes.
1 parent f0ce956 commit 81eb911

File tree

1 file changed

+34
-33
lines changed

1 file changed

+34
-33
lines changed

sub/subassconvert.c

+34-33
Original file line numberDiff line numberDiff line change
@@ -106,25 +106,30 @@ static const struct {
106106

107107
#define SUBRIP_MAX_STACKED_FONT_TAGS 16
108108

109-
/* Read the attribute value starting at *s, and skip *s past the value.
110-
* Set out_value to the parsed value, with possible '"' stripped.
111-
* Return whether the attribute is well formed. */
112-
static bool read_value(char **s, struct bstr *out_value)
109+
/* Read the HTML-style attribute starting at *s, and skip *s past the value.
110+
* Set attr and val to the parsed attribute name and value.
111+
* Return 0 on success, or -1 if no valid attribute was found.
112+
*/
113+
static int read_attr(char **s, struct bstr *attr, struct bstr *val)
113114
{
114-
char term = 0;
115-
if (**s == '"') {
116-
term = '"';
117-
(*s)++;
118-
}
119-
out_value->start = *s;
120-
out_value->len = 0;
121-
unsigned char *start = *s;
122-
unsigned char *end = term ? strchr(start, term) : strpbrk(start, " >");
115+
char *eq = strchr(*s, '=');
116+
if (!eq)
117+
return -1;
118+
attr->start = *s;
119+
attr->len = eq - *s;
120+
for (int i = 0; i < attr->len; i++)
121+
if (!isalnum(attr->start[i]))
122+
return -1;
123+
val->start = eq + 1;
124+
bool quoted = val->start[0] == '"';
125+
if (quoted)
126+
val->start++;
127+
unsigned char *end = strpbrk(val->start, quoted ? "\"" : " >");
123128
if (!end)
124-
return false;
125-
out_value->len = end - out_value->start;
126-
*s = end + (term ? 1 : 0);
127-
return true;
129+
return -1;
130+
val->len = end - val->start;
131+
*s = end + quoted;
132+
return 0;
128133
}
129134

130135
void subassconvert_subrip(const char *orig, char *dest, int dest_buffer_size)
@@ -195,22 +200,21 @@ void subassconvert_subrip(const char *orig, char *dest, int dest_buffer_size)
195200
line += 6;
196201

197202
while (*line && *line != '>') {
198-
if (strncmp(line, "size=", 5) == 0) {
199-
line += 5;
200-
struct bstr val;
201-
if (!read_value(&line, &val))
202-
break;
203+
if (*line == ' ') {
204+
line++;
205+
continue;
206+
}
207+
struct bstr attr, val;
208+
if (read_attr(&line, &attr, &val) < 0)
209+
break;
210+
if (!bstrcmp0(attr, "size")) {
203211
tag->size = bstrtoll(val, &val, 10);
204212
if (val.len)
205213
break;
206214
append_text(&new_line, "{\\fs%d}", tag->size);
207215
tag->has_size = true;
208216
has_valid_attr = true;
209-
} else if (strncmp(line, "color=", 6) == 0) {
210-
line += 6;
211-
struct bstr val;
212-
if (!read_value(&line, &val))
213-
break;
217+
} else if (!bstrcmp0(attr, "color")) {
214218
if (bstr_eatstart(&val, bstr("#"))) {
215219
// #RRGGBB format
216220
tag->color = bstrtoll(val, &val, 16) & 0x00ffffff;
@@ -240,18 +244,15 @@ void subassconvert_subrip(const char *orig, char *dest, int dest_buffer_size)
240244
append_text(&new_line, "{\\c&H%06X&}", tag->color);
241245
tag->has_color = true;
242246
has_valid_attr = true;
243-
} else if (strncmp(line, "face=", 5) == 0) {
247+
} else if (!bstrcmp0(attr, "face")) {
244248
/* Font face attribute */
245-
line += 5;
246-
struct bstr val;
247-
if (!read_value(&line, &val))
248-
break;
249249
tag->face = val;
250250
append_text(&new_line, "{\\fn%.*s}", BSTR_P(tag->face));
251251
tag->has_face = true;
252252
has_valid_attr = true;
253253
} else
254-
line++;
254+
mp_tmsg(MSGT_SUBREADER, MSGL_WARN,"SubRip: unrecognized "
255+
"attribute \"%.*s\" in font tag\n", BSTR_P(attr));
255256
}
256257

257258
if (!has_valid_attr || *line != '>') { /* Not valid font tag */

0 commit comments

Comments
 (0)