Projects STRLCPY syft Commits f473bb75
🤬
  • 1577 spdxlicense generate (#1691)

    Update the license_list.go to have more permissible inputs for greater SPDXID matching.
    EX:
    GPL3 gpl3 gpl-3 and GPL-3 can all map to GPL-3.0-only
    
    By moving all strings to lower and removing the "-" we're able to return valid SPDX license ID for a greater diversity of input strings.
    ---------
    
    Signed-off-by: Christopher Phillips <[email protected]>
  • Loading...
  • f473bb75
    1 parent 539bc2af
Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)
  • ■ ■ ■ ■ ■ ■
    internal/spdxlicense/generate/generate_license_list.go
    skipped 10 lines
    11 11   "strings"
    12 12   "text/template"
    13 13   "time"
    14  - 
    15  - "github.com/scylladb/go-set/strset"
    16 14  )
    17 15   
    18 16  // This program generates license_list.go.
    skipped 16 lines
    35 33  }
    36 34  `))
    37 35   
    38  -var versionMatch = regexp.MustCompile(`-([0-9]+)\.?([0-9]+)?\.?([0-9]+)?\.?`)
     36 +var versionMatch = regexp.MustCompile(`([0-9]+)\.?([0-9]+)?\.?([0-9]+)?\.?`)
    39 37   
    40 38  func main() {
    41 39   if err := run(); err != nil {
    skipped 7 lines
    49 47   if err != nil {
    50 48   return fmt.Errorf("unable to get licenses list: %w", err)
    51 49   }
    52  - 
    53 50   var result LicenseList
    54 51   if err = json.NewDecoder(resp.Body).Decode(&result); err != nil {
    55 52   return fmt.Errorf("unable to decode license list: %w", err)
    skipped 47 lines
    103 100  // The third pass is for overwriting deprecated licenses with replacements, for example GPL-2.0+ is deprecated
    104 101  // and now maps to GPL-2.0-or-later.
    105 102  func processSPDXLicense(result LicenseList) map[string]string {
    106  - // first pass build map
    107  - var licenseIDs = make(map[string]string)
    108  - for _, l := range result.Licenses {
    109  - cleanID := strings.ToLower(l.ID)
    110  - if _, exists := licenseIDs[cleanID]; exists {
    111  - log.Fatalf("duplicate license ID found: %q", cleanID)
    112  - }
    113  - licenseIDs[cleanID] = l.ID
    114  - }
    115  - 
    116  - // The order of variations/permutations of a license ID matters because of we how shuffle its digits,
    117  - // that is because the permutation code can generate the same value for two difference licenses,
    118  - // for example: The licenses `ABC-1.0` and `ABC-1.1` can both map to `ABC-1`,
    119  - // so we need to guarantee the order they are created to avoid mapping them wrongly. So we use a sorted list.
    120  - // To overwrite deprecated licenses during the first pass we would later on rely on map order,
    121  - // [which in go is not consistent by design](https://stackoverflow.com/a/55925880).
     103 + // The order of variations/permutations of a license ID matter.
     104 + // The permutation code can generate the same value for two difference licenses,
     105 + // for example: The licenses `ABC-1.0` and `ABC-1.1` can both map to `ABC1`,
     106 + // we need to guarantee the order they are created to avoid mapping them incorrectly.
     107 + // To do this we use a sorted list.
    122 108   sort.Slice(result.Licenses, func(i, j int) bool {
    123 109   return result.Licenses[i].ID < result.Licenses[j].ID
    124 110   })
    125 111   
    126  - // second pass to build exceptions and replacements
    127  - replaced := strset.New()
     112 + // keys are simplified by removing dashes and lowercasing ID
     113 + // this is so license declarations in the wild like: LGPL3 LGPL-3 lgpl3 and lgpl-3 can all match
     114 + licenseIDs := make(map[string]string)
    128 115   for _, l := range result.Licenses {
    129  - var multipleID []string
    130  - cleanID := strings.ToLower(l.ID)
     116 + // licensePerms includes the cleanID in return slice
     117 + cleanID := cleanLicenseID(l.ID)
     118 + licensePerms := buildLicenseIDPermutations(cleanID)
    131 119   
    132  - var replacement *License
     120 + // if license is deprecated, find its replacement and add to licenseIDs
    133 121   if l.Deprecated {
    134  - replacement = result.findReplacementLicense(l)
     122 + idToMap := l.ID
     123 + replacement := result.findReplacementLicense(l)
    135 124   if replacement != nil {
    136  - licenseIDs[cleanID] = replacement.ID
     125 + idToMap = replacement.ID
     126 + }
     127 + // it's important to use the original licensePerms here so that the deprecated license
     128 + // can now point to the new correct license
     129 + for _, id := range licensePerms {
     130 + if _, exists := licenseIDs[id]; exists {
     131 + // can be used to debug duplicate license permutations and confirm that examples like GPL1
     132 + // do not point to GPL-1.1
     133 + // log.Println("duplicate license list permutation found when mapping deprecated license to replacement")
     134 + // log.Printf("already have key: %q for SPDX ID: %q; attempted to map replacement ID: %q for deprecated ID: %q\n", id, value, replacement.ID, l.ID)
     135 + continue
     136 + }
     137 + licenseIDs[id] = idToMap
    137 138   }
    138 139   }
    139 140   
    140  - multipleID = append(multipleID, buildLicensePermutations(cleanID)...)
    141  - for _, id := range multipleID {
    142  - // don't make replacements for IDs that have already been replaced. Since we have a sorted license list
    143  - // the earliest replacement is correct (any future replacements are not.
    144  - // e.g. replace lgpl-2 with LGPL-2.1-only is wrong, but with LGPL-2.0-only is correct)
    145  - if replacement == nil || replaced.Has(id) {
    146  - if _, exists := licenseIDs[id]; !exists {
    147  - licenseIDs[id] = l.ID
    148  - }
    149  - } else {
    150  - // a useful debugging line during builds
    151  - log.Printf("replacing %s with %s\n", id, replacement.ID)
    152  - 
    153  - licenseIDs[id] = replacement.ID
    154  - replaced.Add(id)
     141 + // if license is not deprecated, add all permutations to licenseIDs
     142 + for _, id := range licensePerms {
     143 + if _, exists := licenseIDs[id]; exists {
     144 + // log.Println("found duplicate license permutation key for non deprecated license")
     145 + // log.Printf("already have key: %q for SPDX ID: %q; tried to insert as SPDX ID:%q\n", id, value, l.ID)
     146 + continue
    155 147   }
     148 + licenseIDs[id] = l.ID
    156 149   }
    157 150   }
    158 151   
    159 152   return licenseIDs
    160 153  }
    161 154   
     155 +func cleanLicenseID(id string) string {
     156 + cleanID := strings.ToLower(id)
     157 + return strings.ReplaceAll(cleanID, "-", "")
     158 +}
     159 + 
  • internal/spdxlicense/generate/generate_license_list_test.go
    Diff is too large to be displayed.
  • ■ ■ ■ ■ ■ ■
    internal/spdxlicense/generate/license.go
    skipped 19 lines
    20 20   SeeAlso []string `json:"seeAlso"`
    21 21  }
    22 22   
    23  -func (l License) canReplace(other License) bool {
     23 +// findReplacementLicense returns a replacement license for a deprecated license
     24 +func (ll LicenseList) findReplacementLicense(deprecated License) *License {
     25 + for _, l := range ll.Licenses {
     26 + if l.canReplace(deprecated) {
     27 + return &l
     28 + }
     29 + }
     30 + 
     31 + return nil
     32 +}
     33 + 
     34 +func (l License) canReplace(deprecated License) bool {
     35 + // don't replace a license with a deprecated license
    24 36   if l.Deprecated {
    25 37   return false
    26 38   }
    27 39   
    28 40   // We want to replace deprecated licenses with non-deprecated counterparts
    29 41   // For more information, see: https://github.com/spdx/license-list-XML/issues/1676
    30  - if other.Deprecated {
    31  - switch {
    32  - case strings.ReplaceAll(l.ID, "-only", "") == other.ID:
    33  - return true
    34  - case strings.ReplaceAll(l.ID, "-or-later", "+") == other.ID:
    35  - return true
    36  - case l.ID == "BSD-2-Clause" && other.ID == "BSD-2-Clause-NetBSD":
    37  - return true
    38  - case l.ID == "BSD-2-Clause-Views" && other.ID == "BSD-2-Clause-FreeBSD":
    39  - return true
    40  - case l.ID == "bzip2-1.0.6" && other.ID == "bzip2-1.0.5":
    41  - return true
    42  - case l.ID == "SMLNJ" && other.ID == "StandardML-NJ":
    43  - return true
    44  - }
     42 + switch {
     43 + case strings.ReplaceAll(l.ID, "-only", "") == deprecated.ID:
     44 + return true
     45 + case strings.ReplaceAll(l.ID, "-or-later", "+") == deprecated.ID:
     46 + return true
     47 + case l.ID == "BSD-2-Clause" && deprecated.ID == "BSD-2-Clause-NetBSD":
     48 + return true
     49 + case l.ID == "BSD-2-Clause-Views" && deprecated.ID == "BSD-2-Clause-FreeBSD":
     50 + return true
     51 + case l.ID == "bzip2-1.0.6" && deprecated.ID == "bzip2-1.0.5":
     52 + return true
     53 + case l.ID == "SMLNJ" && deprecated.ID == "StandardML-NJ":
     54 + return true
    45 55   }
    46 56   
    47  - if l.Name != other.Name {
     57 + if l.Name != deprecated.Name {
    48 58   return false
    49 59   }
    50 60   
    51  - if l.OSIApproved != other.OSIApproved {
     61 + if l.OSIApproved != deprecated.OSIApproved {
    52 62   return false
    53 63   }
    54 64   
    55  - if len(l.SeeAlso) != len(other.SeeAlso) {
     65 + if len(l.SeeAlso) != len(deprecated.SeeAlso) {
    56 66   return false
    57 67   }
    58 68   
    59 69   for i, sa := range l.SeeAlso {
    60  - if sa != other.SeeAlso[i] {
     70 + if sa != deprecated.SeeAlso[i] {
    61 71   return false
    62 72   }
    63 73   }
    64 74   
    65  - return l.ID == other.ID
    66  -}
    67  - 
    68  -func (ll LicenseList) findReplacementLicense(deprecated License) *License {
    69  - for _, l := range ll.Licenses {
    70  - if l.canReplace(deprecated) {
    71  - return &l
    72  - }
    73  - }
    74  - 
    75  - return nil
     75 + return l.ID == deprecated.ID
    76 76  }
    77 77   
    78  -func buildLicensePermutations(license string) (perms []string) {
    79  - lv := findLicenseVersion(license)
     78 +func buildLicenseIDPermutations(cleanID string) (perms []string) {
     79 + lv := findLicenseVersion(cleanID)
    80 80   vp := versionPermutations(lv)
    81 81   
     82 + permSet := strset.New()
    82 83   version := strings.Join(lv, ".")
    83 84   for _, p := range vp {
    84  - perms = append(perms, strings.Replace(license, version, p, 1))
     85 + permSet.Add(strings.Replace(cleanID, version, p, 1))
    85 86   }
    86 87   
    87  - return perms
     88 + permSet.Add(cleanID)
     89 + return permSet.List()
    88 90  }
    89 91   
    90 92  func findLicenseVersion(license string) (version []string) {
    skipped 34 lines
  • ■ ■ ■ ■ ■ ■
    internal/spdxlicense/generate/license_test.go
    skipped 66 lines
    67 67   {
    68 68   "GPL-1-only",
    69 69   []string{
    70  - "GPL-1-only",
    71  - "GPL-1.0-only",
    72  - "GPL-1.0.0-only",
     70 + "gpl1only",
     71 + "gpl1.0only",
     72 + "gpl1.0.0only",
    73 73   },
    74 74   },
    75 75   {
    76 76   "GPL-2",
    77 77   []string{
    78  - "GPL-2",
    79  - "GPL-2.0",
    80  - "GPL-2.0.0",
     78 + "gpl2",
     79 + "gpl2.0",
     80 + "gpl2.0.0",
    81 81   },
    82 82   },
    83 83   {
    84 84   "GPL-2.0+",
    85 85   []string{
    86  - "GPL-2+",
    87  - "GPL-2.0+",
    88  - "GPL-2.0.0+",
     86 + "gpl2+",
     87 + "gpl2.0+",
     88 + "gpl2.0.0+",
    89 89   },
    90 90   },
    91 91   {
    92 92   "GPL-3.0.0-or-later",
    93 93   []string{
    94  - "GPL-3-or-later",
    95  - "GPL-3.0-or-later",
    96  - "GPL-3.0.0-or-later",
     94 + "gpl3orlater",
     95 + "gpl3.0orlater",
     96 + "gpl3.0.0orlater",
    97 97   },
    98 98   },
    99 99   {
    100 100   "abc-1.1",
    101 101   []string{
    102  - "abc-1",
    103  - "abc-1.1",
    104  - "abc-1.1.0",
     102 + "abc1",
     103 + "abc1.1",
     104 + "abc1.1.0",
    105 105   },
    106 106   },
    107 107   {
    108 108   "oldap-2.0",
    109 109   []string{
    110  - "oldap-2",
    111  - "oldap-2.0",
    112  - "oldap-2.0.0",
     110 + "oldap2",
     111 + "oldap2.0",
     112 + "oldap2.0.0",
    113 113   },
    114 114   },
    115 115   }
    116 116   
    117 117   for _, test := range tests {
    118 118   t.Run(test.shortName, func(t *testing.T) {
    119  - perms := buildLicensePermutations(test.shortName)
     119 + cleanID := cleanLicenseID(test.shortName)
     120 + perms := buildLicenseIDPermutations(cleanID)
    120 121   assert.ElementsMatch(t, test.permutations, perms)
    121 122   })
    122 123   }
    skipped 59 lines
    182 183   {
    183 184   "GPL-2",
    184 185   []string{"2"},
    185  - },
    186  - {
    187  - "bzip2-1",
    188  - []string{"1"},
    189 186   },
    190 187   {
    191 188   "php-3.01",
    skipped 33 lines
  • ■ ■ ■ ■ ■ ■
    internal/spdxlicense/license.go
    skipped 17 lines
    18 18   
    19 19  //go:generate go run ./generate
    20 20   
    21  -func ID(id string) (value, other string, exists bool) {
    22  - id = strings.TrimSpace(id)
    23  - // ignore blank strings or the joiner
    24  - if id == "" || id == "AND" {
    25  - return "", "", false
    26  - }
     21 +// ID returns the canonical license ID for the given license ID
     22 +// Note: this function is only concerned with returning a best match of an SPDX license ID
     23 +// SPDX Expressions will be handled by a parent package which will call this function
     24 +func ID(id string) (value string, exists bool) {
    27 25   // first look for a canonical license
    28  - if value, exists := licenseIDs[strings.ToLower(id)]; exists {
    29  - return value, "", exists
     26 + if value, exists := licenseIDs[cleanLicenseID(id)]; exists {
     27 + return value, exists
    30 28   }
    31 29   // we did not find, so treat it as a separate license
    32  - return "", id, true
     30 + return "", false
     31 +}
     32 + 
     33 +func cleanLicenseID(id string) string {
     34 + id = strings.TrimSpace(id)
     35 + id = strings.ToLower(id)
     36 + return strings.ReplaceAll(id, "-", "")
    33 37  }
    34 38   
  • internal/spdxlicense/license_list.go
    Diff is too large to be displayed.
  • ■ ■ ■ ■ ■ ■
    internal/spdxlicense/license_list_test.go
    skipped 8 lines
    9 9  func TestLicenceListIDs(t *testing.T) {
    10 10   // do a sanity check on the generated data
    11 11   assert.Equal(t, "0BSD", licenseIDs["0bsd"])
    12  - assert.Equal(t, "ZPL-2.1", licenseIDs["zpl-2.1"])
    13  - assert.Equal(t, "GPL-2.0-only", licenseIDs["gpl-2"])
    14  - assert.Equal(t, "GPL-2.0-or-later", licenseIDs["gpl-2+"])
     12 + assert.Equal(t, "ZPL-2.1", licenseIDs["zpl2.1"])
     13 + assert.Equal(t, "GPL-2.0-only", licenseIDs["gpl2"])
     14 + assert.Equal(t, "GPL-2.0-or-later", licenseIDs["gpl2+"])
    15 15   assert.NotEmpty(t, Version)
    16 16  }
    17 17   
  • ■ ■ ■ ■ ■ ■
    internal/spdxlicense/license_test.go
    skipped 5 lines
    6 6   "github.com/stretchr/testify/assert"
    7 7  )
    8 8   
    9  -func TestIDParse(t *testing.T) {
     9 +func TestSPDXIDRecognition(t *testing.T) {
    10 10   var tests = []struct {
    11 11   shortName string
    12 12   id string
    13  - other string
    14 13   found bool
    15 14   }{
    16 15   {
    17 16   "GPL-1-only",
    18 17   "GPL-1.0-only",
    19  - "",
     18 + true,
     19 + },
     20 + {
     21 + "gpl1",
     22 + "GPL-1.0-only",
     23 + true,
     24 + },
     25 + {
     26 + "gpl-1",
     27 + "GPL-1.0-only",
    20 28   true,
    21 29   },
    22 30   {
    23 31   "GPL-2",
    24 32   "GPL-2.0-only",
    25  - "",
    26 33   true,
    27 34   },
    28 35   {
    29 36   "GPL-2+",
    30 37   "GPL-2.0-or-later",
    31  - "",
    32 38   true,
    33 39   },
    34 40   {
    35 41   "GPL-3.0.0-or-later",
    36 42   "GPL-3.0-or-later",
    37  - "",
    38 43   true,
    39 44   },
    40 45   {
    41 46   "GPL-3-with-autoconf-exception",
    42 47   "GPL-3.0-with-autoconf-exception",
    43  - "",
    44 48   true,
    45 49   },
    46 50   {
    47 51   "CC-by-nc-3-de",
    48 52   "CC-BY-NC-3.0-DE",
    49  - "",
    50 53   true,
    51 54   },
    52 55   // the below few cases are NOT expected, however, seem unavoidable given the current approach
    53 56   {
    54  - "w3c-20150513.0.0",
    55  - "W3C-20150513",
    56  - "",
    57  - true,
    58  - },
    59  - {
    60 57   "spencer-86.0.0",
    61 58   "Spencer-86",
    62  - "",
    63 59   true,
    64 60   },
    65 61   {
    66 62   "unicode-dfs-2015.0.0",
    67 63   "Unicode-DFS-2015",
    68  - "",
    69 64   true,
    70 65   },
    71 66   {
    72 67   "Unknown",
    73 68   "",
    74  - "Unknown",
    75  - true,
    76  - },
    77  - {
    78  - " ",
    79  - "",
    80  - "",
    81 69   false,
    82 70   },
    83 71   {
    84  - "AND",
    85  - "",
     72 + " ",
    86 73   "",
    87 74   false,
    88 75   },
    skipped 1 lines
    90 77   
    91 78   for _, test := range tests {
    92 79   t.Run(test.shortName, func(t *testing.T) {
    93  - value, other, exists := ID(test.shortName)
     80 + value, exists := ID(test.shortName)
    94 81   assert.Equal(t, test.found, exists)
    95 82   assert.Equal(t, test.id, value)
    96  - assert.Equal(t, test.other, other)
    97 83   })
    98 84   }
    99 85  }
    skipped 1 lines
  • ■ ■ ■ ■ ■
    syft/formats/common/cyclonedxhelpers/licenses.go
    skipped 9 lines
    10 10  func encodeLicenses(p pkg.Package) *cyclonedx.Licenses {
    11 11   lc := cyclonedx.Licenses{}
    12 12   for _, licenseName := range p.Licenses {
    13  - if value, other, exists := spdxlicense.ID(licenseName); exists {
     13 + if value, exists := spdxlicense.ID(licenseName); exists {
    14 14   lc = append(lc, cyclonedx.LicenseChoice{
    15 15   License: &cyclonedx.License{
    16  - ID: value,
    17  - Name: other,
     16 + ID: value,
    18 17   },
    19 18   })
     19 + continue
    20 20   }
     21 + 
     22 + // not found so append the licenseName as is
     23 + lc = append(lc, cyclonedx.LicenseChoice{
     24 + License: &cyclonedx.License{
     25 + Name: licenseName,
     26 + },
     27 + })
    21 28   }
    22 29   if len(lc) > 0 {
    23 30   return &lc
    skipped 24 lines
  • ■ ■ ■ ■ ■ ■
    syft/formats/common/spdxhelpers/license.go
    skipped 38 lines
    39 39   
    40 40  func parseLicenses(raw []string) (parsedLicenses []string) {
    41 41   for _, l := range raw {
    42  - if value, other, exists := spdxlicense.ID(l); exists {
    43  - parsed := value
    44  - if other != "" {
    45  - parsed = spdxlicense.LicenseRefPrefix + other
    46  - }
    47  - parsedLicenses = append(parsedLicenses, parsed)
     42 + if value, exists := spdxlicense.ID(l); exists {
     43 + parsedLicenses = append(parsedLicenses, value)
     44 + } else {
     45 + // we did not find a valid SPDX license ID so treat as separate license
     46 + otherLicense := spdxlicense.LicenseRefPrefix + l
     47 + parsedLicenses = append(parsedLicenses, otherLicense)
    48 48   }
    49 49   }
    50 50   return
    skipped 2 lines
Please wait...
Page is in error, reload to recover