Skip to content

Commit b8bf692

Browse files
authored
Parse: If transfer syntax is missing, attempt to infer it by peeking next 100 bytes. (#330)
This PR attempts to infer missing transfer syntax in dicoms during Parse. Specifically: * When transfer syntax is missing in dicom metadata, attempt to infer the correct transfer syntax by peeking the next 100bytes and trying to read an element without an error. This isn't foolproof, but one option to start with. * This also makes test updates to support testfiles/ that may not have PixelData. * This also introduces a write option to write dicoms without transfer syntax elements, in order to write some "roundtrip" unit tests for this behavior on Parse. I was able to successfully test using some test data from #327, but I need to do some more investigation to see if we can safely add those to our test files (licensing and otherwise). Things to consider in the future: * Try deflated little endian explicit as well. * Peek more/less than the initial 100 bytes, or move away from a fixed peek.
1 parent 0b4bb9f commit b8bf692

File tree

4 files changed

+173
-42
lines changed

4 files changed

+173
-42
lines changed

parse.go

Lines changed: 58 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,10 @@ package dicom
2222

2323
import (
2424
"bufio"
25+
"bytes"
2526
"encoding/binary"
2627
"errors"
28+
"fmt"
2729
"io"
2830
"os"
2931

@@ -170,14 +172,64 @@ func NewParser(in io.Reader, bytesToRead int64, frameChannel chan *frame.Frame,
170172
if tsStr == uid.DeflatedExplicitVRLittleEndian {
171173
p.reader.rawReader.SetDeflate()
172174
}
173-
} else {
174-
// No transfer syntax found, warn the user we're proceeding with the
175-
// default Little Endian implicit.
176-
debug.Log("WARN: could not find transfer syntax uid in metadata, proceeding with little endian implicit")
175+
p.SetTransferSyntax(bo, implicit)
176+
return &p, nil
177177
}
178-
p.SetTransferSyntax(bo, implicit)
179178

180-
return &p, nil
179+
// No transfer syntax found, so let's try to infer the transfer syntax by
180+
// trying to read the next element under various transfer syntaxes.
181+
next100, err := p.reader.rawReader.Peek(100)
182+
if errors.Is(err, io.EOF) {
183+
// DICOM is shorter than 100 bytes.
184+
return nil, fmt.Errorf("dicom with missing transfer syntax metadata is shorter than 100 bytes, so cannot infer transfer syntax")
185+
}
186+
187+
syntaxes := []struct {
188+
name string
189+
bo binary.ByteOrder
190+
implicit bool
191+
}{
192+
{
193+
name: "Little Endian Implicit",
194+
bo: binary.LittleEndian,
195+
implicit: true,
196+
},
197+
{
198+
name: "Big Endian Explicit",
199+
bo: binary.BigEndian,
200+
implicit: false,
201+
},
202+
{
203+
name: "Little Endian Explicit",
204+
bo: binary.LittleEndian,
205+
implicit: false,
206+
},
207+
}
208+
209+
for _, syntax := range syntaxes {
210+
if canReadElementFromBytes(next100, optSet, syntax.bo, syntax.implicit) {
211+
debug.Logf("WARN: could not find transfer syntax uid in metadata, proceeding with %v", syntax.name)
212+
p.SetTransferSyntax(syntax.bo, syntax.implicit)
213+
return &p, nil
214+
}
215+
}
216+
// TODO(https://github.com/suyashkumar/dicom/issues/329): consider trying
217+
// deflated parsing as a fallback as well.
218+
return &p, errors.New("dicom missing transfer syntax uid in metadata, and it was not possible to successfully infer it using the next 100 bytes of the dicom")
219+
}
220+
221+
func canReadElementFromBytes(buf []byte, optSet parseOptSet, bo binary.ByteOrder, implicit bool) bool {
222+
next100Reader := bytes.NewReader(buf)
223+
subR := &reader{
224+
rawReader: dicomio.NewReader(bufio.NewReader(next100Reader), bo, int64(len(buf))),
225+
opts: optSet,
226+
}
227+
subR.rawReader.SetTransferSyntax(bo, implicit)
228+
_, err := subR.readElement(nil, nil)
229+
if err == nil {
230+
return true
231+
}
232+
return false
181233
}
182234

183235
// Next parses and returns the next top-level element from the DICOM this Parser points to.

parse_internal_test.go

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,18 @@
11
package dicom
22

33
import (
4+
"bytes"
5+
"errors"
46
"os"
57
"strings"
68
"testing"
9+
10+
"github.com/suyashkumar/dicom/pkg/tag"
711
)
812

13+
// parse_internal_test.go holds tests that must exist in the dicom package (as
14+
// opposed to dicom_test), in order to access internal entities.
15+
916
// TestParseUntilEOFConformsToParse runs both the dicom.ParseUntilEOF and the dicom.Parse APIs against each
1017
// testdata file and ensures the outputs are the same.
1118
// This test lives in parse_internal_test.go because this test cannot live in the dicom_test package, due
@@ -48,6 +55,58 @@ func TestParseUntilEOFConformsToParse(t *testing.T) {
4855
}
4956
}
5057

58+
func TestParse_InfersMissingTransferSyntax(t *testing.T) {
59+
dsWithMissingTS := Dataset{Elements: []*Element{
60+
mustNewElement(tag.MediaStorageSOPClassUID, []string{"1.2.840.10008.5.1.4.1.1.1.2"}),
61+
mustNewElement(tag.MediaStorageSOPInstanceUID, []string{"1.2.3.4.5.6.7"}),
62+
mustNewElement(tag.PatientName, []string{"Bob", "Jones"}),
63+
mustNewElement(tag.Rows, []int{128}),
64+
mustNewElement(tag.FloatingPointValue, []float64{128.10}),
65+
mustNewElement(tag.DimensionIndexPointer, []int{32, 36950}),
66+
mustNewElement(tag.RedPaletteColorLookupTableData, make([]byte, 200)),
67+
}}
68+
69+
cases := []struct {
70+
name string
71+
overrideTransferSyntax string
72+
}{
73+
{
74+
name: "Little Endian Implicit",
75+
overrideTransferSyntax: "1.2.840.10008.1.2",
76+
},
77+
{
78+
name: "Little Endian Explicit",
79+
overrideTransferSyntax: "1.2.840.10008.1.2.1",
80+
},
81+
{
82+
name: "Big Endian Explicit",
83+
overrideTransferSyntax: "1.2.840.10008.1.2.2",
84+
},
85+
}
86+
87+
for _, tc := range cases {
88+
t.Run(tc.name, func(t *testing.T) {
89+
// Write out Dataset with OverrideMissingTransferSyntax option _and_
90+
// the skipWritingTransferSyntaxForTests to ensure no Transfer Syntax
91+
// element is written to the test dicom. The test later verifies
92+
// that no Transfer Syntax element was written to the metadata.
93+
writtenDICOM := &bytes.Buffer{}
94+
if err := Write(writtenDICOM, dsWithMissingTS, OverrideMissingTransferSyntax(tc.overrideTransferSyntax), skipWritingTransferSyntaxForTests()); err != nil {
95+
t.Errorf("Write(OverrideMissingTransferSyntax(%v)) returned unexpected error: %v", tc.overrideTransferSyntax, err)
96+
}
97+
98+
parsedDS, err := ParseUntilEOF(writtenDICOM, nil)
99+
if err != nil {
100+
t.Fatalf("ParseUntilEOF returned unexpected error when reading written dataset back in: %v", err)
101+
}
102+
_, err = parsedDS.FindElementByTag(tag.TransferSyntaxUID)
103+
if !errors.Is(err, ErrorElementNotFound) {
104+
t.Fatalf("expected test dicom dataset to be missing explicit TransferSyntaxUID tag, but found one. got: %v, want: ErrorElementNotFound", err)
105+
}
106+
})
107+
}
108+
}
109+
51110
func readTestdataFile(t *testing.T, name string) *os.File {
52111
dcm, err := os.Open("./testdata/" + name)
53112
if err != nil {

parse_test.go

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -100,37 +100,43 @@ func TestParseFile_SkipPixelData(t *testing.T) {
100100
runForEveryTestFile(t, func(t *testing.T, filename string) {
101101
dataset, err := dicom.ParseFile(filename, nil, dicom.SkipPixelData())
102102
if err != nil {
103-
t.Errorf("Unexpected error parsing dataset: %v", dataset)
103+
t.Errorf("Unexpected error parsing dataset: %v, dataset: %v", err, dataset)
104104
}
105+
// If PixelData present in this DICOM, check if it's populated
106+
// correctly. The current test assumption is that if PixelData is
107+
// missing, it was not originally in the dicom (which we should
108+
// consider revisiting).
105109
el, err := dataset.FindElementByTag(tag.PixelData)
106-
if err != nil {
107-
t.Errorf("Unexpected error when finding PixelData in Dataset: %v", err)
108-
}
109-
pixelData := dicom.MustGetPixelDataInfo(el.Value)
110-
if !pixelData.IntentionallySkipped {
111-
t.Errorf("Expected pixelData.IntentionallySkipped=true, got false")
112-
}
113-
if got := len(pixelData.Frames); got != 0 {
114-
t.Errorf("unexpected frames length. got: %v, want: %v", got, 0)
110+
if err == nil {
111+
pixelData := dicom.MustGetPixelDataInfo(el.Value)
112+
if !pixelData.IntentionallySkipped {
113+
t.Errorf("Expected pixelData.IntentionallySkipped=true, got false")
114+
}
115+
if got := len(pixelData.Frames); got != 0 {
116+
t.Errorf("unexpected frames length. got: %v, want: %v", got, 0)
117+
}
115118
}
116119
})
117120
})
118121
t.Run("WithNOSkipPixelData", func(t *testing.T) {
119122
runForEveryTestFile(t, func(t *testing.T, filename string) {
120123
dataset, err := dicom.ParseFile(filename, nil)
121124
if err != nil {
122-
t.Errorf("Unexpected error parsing dataset: %v", dataset)
125+
t.Errorf("Unexpected error parsing dataset: %v, dataset: %v", err, dataset)
123126
}
127+
// If PixelData present in this DICOM, check if it's populated
128+
// correctly. The current test assumption is that if PixelData is
129+
// missing, it was not originally in the dicom (which we should
130+
// consider revisiting).
124131
el, err := dataset.FindElementByTag(tag.PixelData)
125-
if err != nil {
126-
t.Errorf("Unexpected error when finding PixelData in Dataset: %v", err)
127-
}
128-
pixelData := dicom.MustGetPixelDataInfo(el.Value)
129-
if pixelData.IntentionallySkipped {
130-
t.Errorf("Expected pixelData.IntentionallySkipped=false when SkipPixelData option not present, got true")
131-
}
132-
if len(pixelData.Frames) == 0 {
133-
t.Errorf("unexpected frames length when SkipPixelData=false. got: %v, want: >0", len(pixelData.Frames))
132+
if err == nil {
133+
pixelData := dicom.MustGetPixelDataInfo(el.Value)
134+
if pixelData.IntentionallySkipped {
135+
t.Errorf("Expected pixelData.IntentionallySkipped=false when SkipPixelData option not present, got true")
136+
}
137+
if len(pixelData.Frames) == 0 {
138+
t.Errorf("unexpected frames length when SkipPixelData=false. got: %v, want: >0", len(pixelData.Frames))
139+
}
134140
}
135141
})
136142
})

write.go

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -156,12 +156,24 @@ func OverrideMissingTransferSyntax(transferSyntaxUID string) WriteOption {
156156
}
157157
}
158158

159+
// skipWritingTransferSyntaxForTests is a test WriteOption that cause Write to skip
160+
// writing the transfer syntax uid element in the DICOM metadata. When used in
161+
// combination with OverrideMissingTransferSyntax, this can be used to set the
162+
// TransferSyntax for the written dataset without writing the actual transfer
163+
// syntax element to the metadata.
164+
func skipWritingTransferSyntaxForTests() WriteOption {
165+
return func(set *writeOptSet) {
166+
set.skipWritingTransferSyntaxForTests = true
167+
}
168+
}
169+
159170
// writeOptSet represents the flattened option set after all WriteOptions have been applied.
160171
type writeOptSet struct {
161-
skipVRVerification bool
162-
skipValueTypeVerification bool
163-
defaultMissingTransferSyntax bool
164-
overrideMissingTransferSyntaxUID string
172+
skipVRVerification bool
173+
skipValueTypeVerification bool
174+
defaultMissingTransferSyntax bool
175+
overrideMissingTransferSyntaxUID string
176+
skipWritingTransferSyntaxForTests bool
165177
}
166178

167179
func (w *writeOptSet) validate() error {
@@ -203,21 +215,23 @@ func writeFileHeader(w *dicomio.Writer, ds *Dataset, metaElems []*Element, opts
203215
return err
204216
}
205217

206-
err = writeMetaElem(subWriter, tag.TransferSyntaxUID, ds, &tagsUsed, opts)
218+
if !opts.skipWritingTransferSyntaxForTests {
219+
err = writeMetaElem(subWriter, tag.TransferSyntaxUID, ds, &tagsUsed, opts)
207220

208-
if errors.Is(err, ErrorElementNotFound) && opts.defaultMissingTransferSyntax {
209-
// Write the default transfer syntax
210-
if err = writeElement(subWriter, mustNewElement(tag.TransferSyntaxUID, []string{uid.ImplicitVRLittleEndian}), opts); err != nil {
211-
return err
212-
}
213-
} else if errors.Is(err, ErrorElementNotFound) && opts.overrideMissingTransferSyntaxUID != "" {
214-
// Write the override transfer syntax
215-
if err = writeElement(subWriter, mustNewElement(tag.TransferSyntaxUID, []string{opts.overrideMissingTransferSyntaxUID}), opts); err != nil {
221+
if errors.Is(err, ErrorElementNotFound) && opts.defaultMissingTransferSyntax {
222+
// Write the default transfer syntax
223+
if err = writeElement(subWriter, mustNewElement(tag.TransferSyntaxUID, []string{uid.ImplicitVRLittleEndian}), opts); err != nil {
224+
return err
225+
}
226+
} else if errors.Is(err, ErrorElementNotFound) && opts.overrideMissingTransferSyntaxUID != "" {
227+
// Write the override transfer syntax
228+
if err = writeElement(subWriter, mustNewElement(tag.TransferSyntaxUID, []string{opts.overrideMissingTransferSyntaxUID}), opts); err != nil {
229+
return err
230+
}
231+
} else if err != nil {
232+
// Return the error if none of the above conditions/overrides apply.
216233
return err
217234
}
218-
} else if err != nil {
219-
// Return the error if none of the above conditions/overrides apply.
220-
return err
221235
}
222236

223237
for _, elem := range metaElems {

0 commit comments

Comments
 (0)