Projects STRLCPY syft Commits b81c9805
🤬
  • Allow scanning unpacked container filesystems (#1485)

    * source: avoid second-step of symlink resolution in directory resolver
    
    We can use the already existing file tree to peform symlink resolution
    for FilesByPath, instead of traversing the symlinks again.
    
    This moves all of the symlink logic into the indexing code, and then we
    can rely on syft's resolution algorithm over the index in this part of
    the codebase.
    
    Signed-off-by: Justin Chadwell <[email protected]>
    
    * source: add base parameter to directory resolver
    
    The new base parameter is an optional parameter for the directory
    resolver that resolves all symlinks relative to this root. There are two
    intended use cases:
    
    - base = "/". The previous behavior, symlinks are resolved relative to
    the root filesystem.
    - base = path. Symlinks are resolved relative to the target filesystem,
    allowing correct behavior when scanning unpacked container filesystems
    on disk.
    
    Signed-off-by: Justin Chadwell <[email protected]>
    
    * source: add tests for new base parameter
    
    Signed-off-by: Justin Chadwell <[email protected]>
    
    ---------
    
    Signed-off-by: Justin Chadwell <[email protected]>
  • Loading...
  • Justin Chadwell committed with GitHub 1 year ago
    b81c9805
    1 parent ba559631
Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)
  • ■ ■ ■ ■ ■ ■
    syft/source/directory_resolver.go
    skipped 36 lines
    37 37  // directoryResolver implements path and content access for the directory data source.
    38 38  type directoryResolver struct {
    39 39   path string
     40 + base string
    40 41   currentWdRelativeToRoot string
    41 42   currentWd string
    42 43   fileTree *filetree.FileTree
    skipped 4 lines
    47 48   errPaths map[string]error
    48 49  }
    49 50   
    50  -func newDirectoryResolver(root string, pathFilters ...pathFilterFn) (*directoryResolver, error) {
     51 +func newDirectoryResolver(root string, base string, pathFilters ...pathFilterFn) (*directoryResolver, error) {
    51 52   currentWD, err := os.Getwd()
    52 53   if err != nil {
    53 54   return nil, fmt.Errorf("could not get CWD: %w", err)
    skipped 10 lines
    64 65   return nil, fmt.Errorf("could not evaluate root=%q symlinks: %w", root, err)
    65 66   }
    66 67   
     68 + cleanBase := ""
     69 + if base != "" {
     70 + cleanBase, err = filepath.EvalSymlinks(base)
     71 + if err != nil {
     72 + return nil, fmt.Errorf("could not evaluate base=%q symlinks: %w", base, err)
     73 + }
     74 + cleanBase, err = filepath.Abs(cleanBase)
     75 + if err != nil {
     76 + return nil, err
     77 + }
     78 + }
     79 + 
    67 80   var currentWdRelRoot string
    68 81   if path.IsAbs(cleanRoot) {
    69 82   currentWdRelRoot, err = filepath.Rel(cleanCWD, cleanRoot)
    skipped 6 lines
    76 89   
    77 90   resolver := directoryResolver{
    78 91   path: cleanRoot,
     92 + base: cleanBase,
    79 93   currentWd: cleanCWD,
    80 94   currentWdRelativeToRoot: currentWdRelRoot,
    81 95   fileTree: filetree.NewFileTree(),
    skipped 162 lines
    244 258   return "", fmt.Errorf("unable to readlink for path=%q: %w", p, err)
    245 259   }
    246 260   
    247  - // note: if the link is not absolute (e.g, /dev/stderr -> fd/2 ) we need to resolve it relative to the directory
    248  - // in question (e.g. resolve to /dev/fd/2)
    249  - if !filepath.IsAbs(linkTarget) {
    250  - linkTarget = filepath.Join(filepath.Dir(p), linkTarget)
     261 + if filepath.IsAbs(linkTarget) {
     262 + // if the link is absolute (e.g, /bin/ls -> /bin/busybox) we need to
     263 + // resolve relative to the root of the base directory
     264 + linkTarget = filepath.Join(r.base, filepath.Clean(linkTarget))
     265 + } else {
     266 + // if the link is not absolute (e.g, /dev/stderr -> fd/2 ) we need to
     267 + // resolve it relative to the directory in question (e.g. resolve to
     268 + // /dev/fd/2)
     269 + if r.base == "" {
     270 + linkTarget = filepath.Join(filepath.Dir(p), linkTarget)
     271 + } else {
     272 + // if the base is set, then we first need to resolve the link,
     273 + // before finding it's location in the base
     274 + dir, err := filepath.Rel(r.base, filepath.Dir(p))
     275 + if err != nil {
     276 + return "", fmt.Errorf("unable to resolve relative path for path=%q: %w", p, err)
     277 + }
     278 + linkTarget = filepath.Join(r.base, filepath.Clean(filepath.Join("/", dir, linkTarget)))
     279 + }
    251 280   }
    252 281   
    253 282   ref, err := r.fileTree.AddSymLink(file.Path(p), file.Path(linkTarget))
    skipped 82 lines
    336 365   }
    337 366   
    338 367   // we should be resolving symlinks and preserving this information as a VirtualPath to the real file
    339  - evaluatedPath, err := filepath.EvalSymlinks(userStrPath)
     368 + exists, ref, err := r.fileTree.File(file.Path(userStrPath), filetree.FollowBasenameLinks)
    340 369   if err != nil {
    341 370   log.Tracef("unable to evaluate symlink for path=%q : %+v", userPath, err)
    342 371   continue
    343 372   }
     373 + if !exists {
     374 + continue
     375 + }
    344 376   
    345 377   // TODO: why not use stored metadata?
    346  - fileMeta, err := os.Stat(evaluatedPath)
     378 + fileMeta, err := os.Stat(string(ref.RealPath))
    347 379   if errors.Is(err, os.ErrNotExist) {
    348 380   // note: there are other kinds of errors other than os.ErrNotExist that may be given that is platform
    349 381   // specific, but essentially hints at the same overall problem (that the path does not exist). Such an
    skipped 4 lines
    354 386   // invalid paths. This logging statement is meant to raise IO or permissions related problems.
    355 387   var pathErr *os.PathError
    356 388   if !errors.As(err, &pathErr) {
    357  - log.Warnf("path is not valid (%s): %+v", evaluatedPath, err)
     389 + log.Warnf("path is not valid (%s): %+v", ref.RealPath, err)
    358 390   }
    359 391   continue
    360 392   }
    skipped 7 lines
    368 400   userStrPath = windowsToPosix(userStrPath)
    369 401   }
    370 402   
    371  - exists, ref, err := r.fileTree.File(file.Path(userStrPath), filetree.FollowBasenameLinks)
    372  - if err == nil && exists {
    373  - loc := NewVirtualLocationFromDirectory(
    374  - r.responsePath(string(ref.RealPath)), // the actual path relative to the resolver root
    375  - r.responsePath(userStrPath), // the path used to access this file, relative to the resolver root
    376  - *ref,
    377  - )
    378  - references = append(references, loc)
    379  - }
     403 + loc := NewVirtualLocationFromDirectory(
     404 + r.responsePath(string(ref.RealPath)), // the actual path relative to the resolver root
     405 + r.responsePath(userStrPath), // the path used to access this file, relative to the resolver root
     406 + *ref,
     407 + )
     408 + references = append(references, loc)
    380 409   }
    381 410   
    382 411   return references, nil
    skipped 204 lines
  • ■ ■ ■ ■ ■
    syft/source/directory_resolver_test.go
    skipped 56 lines
    57 57   }
    58 58   for _, c := range cases {
    59 59   t.Run(c.name, func(t *testing.T) {
    60  - resolver, err := newDirectoryResolver(c.relativeRoot)
     60 + resolver, err := newDirectoryResolver(c.relativeRoot, "")
    61 61   assert.NoError(t, err)
    62 62   
    63 63   refs, err := resolver.FilesByPath(c.input)
    skipped 47 lines
    111 111   absRoot, err := filepath.Abs(c.relativeRoot)
    112 112   require.NoError(t, err)
    113 113   
    114  - resolver, err := newDirectoryResolver(absRoot)
     114 + resolver, err := newDirectoryResolver(absRoot, "")
    115 115   assert.NoError(t, err)
    116 116   
    117 117   refs, err := resolver.FilesByPath(c.input)
    skipped 54 lines
    172 172   }
    173 173   for _, c := range cases {
    174 174   t.Run(c.name, func(t *testing.T) {
    175  - resolver, err := newDirectoryResolver(c.root)
     175 + resolver, err := newDirectoryResolver(c.root, "")
    176 176   assert.NoError(t, err)
    177 177   
    178 178   hasPath := resolver.HasPath(c.input)
    skipped 41 lines
    220 220   }
    221 221   for _, c := range cases {
    222 222   t.Run(c.name, func(t *testing.T) {
    223  - resolver, err := newDirectoryResolver("./test-fixtures")
     223 + resolver, err := newDirectoryResolver("./test-fixtures", "")
    224 224   assert.NoError(t, err)
    225 225   refs, err := resolver.FilesByPath(c.input...)
    226 226   assert.NoError(t, err)
    skipped 6 lines
    233 233  }
    234 234   
    235 235  func TestDirectoryResolver_FilesByGlobMultiple(t *testing.T) {
    236  - resolver, err := newDirectoryResolver("./test-fixtures")
     236 + resolver, err := newDirectoryResolver("./test-fixtures", "")
    237 237   assert.NoError(t, err)
    238 238   refs, err := resolver.FilesByGlob("**/image-symlinks/file*")
    239 239   assert.NoError(t, err)
    skipped 2 lines
    242 242  }
    243 243   
    244 244  func TestDirectoryResolver_FilesByGlobRecursive(t *testing.T) {
    245  - resolver, err := newDirectoryResolver("./test-fixtures/image-symlinks")
     245 + resolver, err := newDirectoryResolver("./test-fixtures/image-symlinks", "")
    246 246   assert.NoError(t, err)
    247 247   refs, err := resolver.FilesByGlob("**/*.txt")
    248 248   assert.NoError(t, err)
    skipped 1 lines
    250 250  }
    251 251   
    252 252  func TestDirectoryResolver_FilesByGlobSingle(t *testing.T) {
    253  - resolver, err := newDirectoryResolver("./test-fixtures")
     253 + resolver, err := newDirectoryResolver("./test-fixtures", "")
    254 254   assert.NoError(t, err)
    255 255   refs, err := resolver.FilesByGlob("**/image-symlinks/*1.txt")
    256 256   assert.NoError(t, err)
    skipped 20 lines
    277 277   
    278 278   for _, test := range tests {
    279 279   t.Run(test.name, func(t *testing.T) {
    280  - resolver, err := newDirectoryResolver("./test-fixtures/symlinks-simple")
     280 + resolver, err := newDirectoryResolver("./test-fixtures/symlinks-simple", "")
    281 281   assert.NoError(t, err)
    282 282   
    283 283   refs, err := resolver.FilesByPath(test.fixture)
    skipped 16 lines
    300 300   
    301 301  func TestDirectoryResolverDoesNotIgnoreRelativeSystemPaths(t *testing.T) {
    302 302   // let's make certain that "dev/place" is not ignored, since it is not "/dev/place"
    303  - resolver, err := newDirectoryResolver("test-fixtures/system_paths/target")
     303 + resolver, err := newDirectoryResolver("test-fixtures/system_paths/target", "")
    304 304   assert.NoError(t, err)
    305 305   // ensure the correct filter function is wired up by default
    306 306   expectedFn := reflect.ValueOf(isUnallowableFileType)
    skipped 124 lines
    431 431   
    432 432  func Test_directoryResolver_index(t *testing.T) {
    433 433   // note: this test is testing the effects from newDirectoryResolver, indexTree, and addPathToIndex
    434  - r, err := newDirectoryResolver("test-fixtures/system_paths/target")
     434 + r, err := newDirectoryResolver("test-fixtures/system_paths/target", "")
    435 435   if err != nil {
    436 436   t.Fatalf("unable to get indexed dir resolver: %+v", err)
    437 437   }
    skipped 170 lines
    608 608   }
    609 609   for _, test := range tests {
    610 610   t.Run(test.fixturePath, func(t *testing.T) {
    611  - resolver, err := newDirectoryResolver(test.fixturePath)
     611 + resolver, err := newDirectoryResolver(test.fixturePath, "")
    612 612   assert.NoError(t, err)
    613 613   locations, err := resolver.FilesByMIMEType(test.mimeType)
    614 614   assert.NoError(t, err)
    skipped 6 lines
    621 621  }
    622 622   
    623 623  func Test_IndexingNestedSymLinks(t *testing.T) {
    624  - resolver, err := newDirectoryResolver("./test-fixtures/symlinks-simple")
     624 + resolver, err := newDirectoryResolver("./test-fixtures/symlinks-simple", "")
    625 625   require.NoError(t, err)
    626 626   
    627 627   // check that we can get the real path
    skipped 46 lines
    674 674   return strings.HasSuffix(path, string(filepath.Separator)+"readme")
    675 675   }
    676 676   
    677  - resolver, err := newDirectoryResolver("./test-fixtures/symlinks-simple", filterFn)
     677 + resolver, err := newDirectoryResolver("./test-fixtures/symlinks-simple", "", filterFn)
    678 678   require.NoError(t, err)
    679 679   
    680 680   // the path to the real file is PRUNED from the index, so we should NOT expect a location returned
    skipped 13 lines
    694 694  }
    695 695   
    696 696  func Test_IndexingNestedSymLinksOutsideOfRoot(t *testing.T) {
    697  - resolver, err := newDirectoryResolver("./test-fixtures/symlinks-multiple-roots/root")
     697 + resolver, err := newDirectoryResolver("./test-fixtures/symlinks-multiple-roots/root", "")
    698 698   require.NoError(t, err)
    699 699   
    700 700   // check that we can get the real path
    skipped 11 lines
    712 712  }
    713 713   
    714 714  func Test_RootViaSymlink(t *testing.T) {
    715  - resolver, err := newDirectoryResolver("./test-fixtures/symlinked-root/nested/link-root")
     715 + resolver, err := newDirectoryResolver("./test-fixtures/symlinked-root/nested/link-root", "")
    716 716   require.NoError(t, err)
    717 717   
    718 718   locations, err := resolver.FilesByPath("./file1.txt")
    skipped 34 lines
    753 753   }
    754 754   for _, test := range tests {
    755 755   t.Run(test.name, func(t *testing.T) {
    756  - r, err := newDirectoryResolver(".")
     756 + r, err := newDirectoryResolver(".", "")
    757 757   require.NoError(t, err)
    758 758   
    759 759   actual, err := r.FileContentsByLocation(test.location)
    skipped 59 lines
    819 819   
    820 820  func Test_SymlinkLoopWithGlobsShouldResolve(t *testing.T) {
    821 821   test := func(t *testing.T) {
    822  - resolver, err := newDirectoryResolver("./test-fixtures/symlinks-loop")
     822 + resolver, err := newDirectoryResolver("./test-fixtures/symlinks-loop", "")
    823 823   require.NoError(t, err)
    824 824   
    825 825   locations, err := resolver.FilesByGlob("**/file.target")
    skipped 27 lines
    853 853   return path != "/"
    854 854   }
    855 855   
    856  - resolver, err := newDirectoryResolver("/", filterFn)
     856 + resolver, err := newDirectoryResolver("/", "", filterFn)
    857 857   require.NoError(t, err)
    858 858   
    859 859   exists, ref, err := resolver.fileTree.File(file.Path("/"))
    skipped 10 lines
    870 870   tempFile, err := os.CreateTemp("", "")
    871 871   require.NoError(t, err)
    872 872   
    873  - resolver, err := newDirectoryResolver(tempFile.Name())
     873 + resolver, err := newDirectoryResolver(tempFile.Name(), "")
    874 874   require.NoError(t, err)
    875 875   
    876 876   t.Run("filtering path with nil os.FileInfo", func(t *testing.T) {
    skipped 9 lines
    886 886   })
    887 887  }
    888 888   
     889 +func TestDirectoryResolver_FilesByPath_baseRoot(t *testing.T) {
     890 + cases := []struct {
     891 + name string
     892 + root string
     893 + input string
     894 + expected []string
     895 + }{
     896 + {
     897 + name: "should find the base file",
     898 + root: "./test-fixtures/symlinks-base/",
     899 + input: "./base",
     900 + expected: []string{
     901 + "base",
     902 + },
     903 + },
     904 + {
     905 + name: "should follow a link with a pivoted root",
     906 + root: "./test-fixtures/symlinks-base/",
     907 + input: "./foo",
     908 + expected: []string{
     909 + "base",
     910 + },
     911 + },
     912 + {
     913 + name: "should follow a relative link with extra parents",
     914 + root: "./test-fixtures/symlinks-base/",
     915 + input: "./bar",
     916 + expected: []string{
     917 + "base",
     918 + },
     919 + },
     920 + {
     921 + name: "should follow an absolute link with extra parents",
     922 + root: "./test-fixtures/symlinks-base/",
     923 + input: "./baz",
     924 + expected: []string{
     925 + "base",
     926 + },
     927 + },
     928 + {
     929 + name: "should follow an absolute link with extra parents",
     930 + root: "./test-fixtures/symlinks-base/",
     931 + input: "./sub/link",
     932 + expected: []string{
     933 + "sub/item",
     934 + },
     935 + },
     936 + {
     937 + name: "should follow chained pivoted link",
     938 + root: "./test-fixtures/symlinks-base/",
     939 + input: "./chain",
     940 + expected: []string{
     941 + "base",
     942 + },
     943 + },
     944 + }
     945 + for _, c := range cases {
     946 + t.Run(c.name, func(t *testing.T) {
     947 + resolver, err := newDirectoryResolver(c.root, c.root)
     948 + assert.NoError(t, err)
     949 + 
     950 + refs, err := resolver.FilesByPath(c.input)
     951 + require.NoError(t, err)
     952 + assert.Len(t, refs, len(c.expected))
     953 + s := strset.New()
     954 + for _, actual := range refs {
     955 + s.Add(actual.RealPath)
     956 + }
     957 + assert.ElementsMatch(t, c.expected, s.List())
     958 + })
     959 + }
     960 +}
     961 + 
  • ■ ■ ■ ■ ■
    syft/source/metadata.go
    skipped 5 lines
    6 6   Scheme Scheme // the source data scheme type (directory or image)
    7 7   ImageMetadata ImageMetadata // all image info (image only)
    8 8   Path string // the root path to be cataloged (directory only)
     9 + Base string // the base path to be cataloged (directory only)
    9 10   Name string
    10 11  }
    11 12   
  • ■ ■ ■ ■ ■ ■
    syft/source/source.go
    skipped 31 lines
    32 32   Metadata Metadata
    33 33   directoryResolver *directoryResolver `hash:"ignore"`
    34 34   path string
     35 + base string
    35 36   mutex *sync.Mutex
    36 37   Exclusions []string `hash:"ignore"`
    37 38  }
    skipped 214 lines
    252 253   return NewFromDirectoryWithName(path, "")
    253 254  }
    254 255   
     256 +// NewFromDirectory creates a new source object tailored to catalog a given filesystem directory recursively.
     257 +func NewFromDirectoryRoot(path string) (Source, error) {
     258 + return NewFromDirectoryRootWithName(path, "")
     259 +}
     260 + 
    255 261  // NewFromDirectoryWithName creates a new source object tailored to catalog a given filesystem directory recursively, with an explicitly provided name.
    256 262  func NewFromDirectoryWithName(path string, name string) (Source, error) {
    257 263   s := Source{
    skipped 4 lines
    262 268   Path: path,
    263 269   },
    264 270   path: path,
     271 + }
     272 + s.SetID()
     273 + return s, nil
     274 +}
     275 + 
     276 +// NewFromDirectoryRootWithName creates a new source object tailored to catalog a given filesystem directory recursively, with an explicitly provided name.
     277 +func NewFromDirectoryRootWithName(path string, name string) (Source, error) {
     278 + s := Source{
     279 + mutex: &sync.Mutex{},
     280 + Metadata: Metadata{
     281 + Name: name,
     282 + Scheme: DirectoryScheme,
     283 + Path: path,
     284 + Base: path,
     285 + },
     286 + path: path,
     287 + base: path,
    265 288   }
    266 289   s.SetID()
    267 290   return s, nil
    skipped 160 lines
    428 451   if err != nil {
    429 452   return nil, err
    430 453   }
    431  - resolver, err := newDirectoryResolver(s.path, exclusionFunctions...)
     454 + resolver, err := newDirectoryResolver(s.path, s.base, exclusionFunctions...)
    432 455   if err != nil {
    433 456   return nil, fmt.Errorf("unable to create directory resolver: %w", err)
    434 457   }
    skipped 114 lines
  • ■ ■ ■ ■
    syft/source/source_test.go
    skipped 120 lines
    121 121   Path: "test-fixtures/image-simple",
    122 122   },
    123 123   },
    124  - expected: artifact.ID("14b60020c4f9955"),
     124 + expected: artifact.ID("1b0dc351e6577b01"),
    125 125   },
    126 126   }
    127 127   
    skipped 699 lines
Please wait...
Page is in error, reload to recover