■ ■ ■ ■ ■ ■
internal/spdxlicense/generate/generate_license_list.go
skipped 10 lines 11 11 "strings" 12 12 "text/template" 13 13 "time" 14 - 15 - "github.com/scylladb/go-set/strset" 16 14 ) 17 15 18 16 // This program generates license_list.go. skipped 16 lines 35 33 } 36 34 `)) 37 35 38 - var versionMatch = regexp.MustCompile(`- ([0-9]+)\.?([0-9]+)?\.?([0-9]+)?\.?`) 36 + var versionMatch = regexp.MustCompile(`([0-9]+)\.?([0-9]+)?\.?([0-9]+)?\.?`) 39 37 40 38 func main() { 41 39 if err := run(); err != nil { skipped 7 lines 49 47 if err != nil { 50 48 return fmt.Errorf("unable to get licenses list: %w", err) 51 49 } 52 - 53 50 var result LicenseList 54 51 if err = json.NewDecoder(resp.Body).Decode(&result); err != nil { 55 52 return fmt.Errorf("unable to decode license list: %w", err) skipped 47 lines 103 100 // The third pass is for overwriting deprecated licenses with replacements, for example GPL-2.0+ is deprecated 104 101 // and now maps to GPL-2.0-or-later. 105 102 func processSPDXLicense(result LicenseList) map[string]string { 106 - // first pass build map 107 - var licenseIDs = make(map[string]string) 108 - for _, l := range result.Licenses { 109 - cleanID := strings.ToLower(l.ID) 110 - if _, exists := licenseIDs[cleanID]; exists { 111 - log.Fatalf("duplicate license ID found: %q", cleanID) 112 - } 113 - licenseIDs[cleanID] = l.ID 114 - } 115 - 116 - // The order of variations/permutations of a license ID matters because of we how shuffle its digits, 117 - // that is because the permutation code can generate the same value for two difference licenses, 118 - // for example: The licenses `ABC-1.0` and `ABC-1.1` can both map to `ABC-1`, 119 - // so we need to guarantee the order they are created to avoid mapping them wrongly. So we use a sorted list. 120 - // To overwrite deprecated licenses during the first pass we would later on rely on map order, 121 - // [which in go is not consistent by design](https://stackoverflow.com/a/55925880). 103 + // The order of variations/permutations of a license ID matter. 104 + // The permutation code can generate the same value for two difference licenses, 105 + // for example: The licenses `ABC-1.0` and `ABC-1.1` can both map to `ABC1`, 106 + // we need to guarantee the order they are created to avoid mapping them incorrectly. 107 + // To do this we use a sorted list. 122 108 sort.Slice(result.Licenses, func(i, j int) bool { 123 109 return result.Licenses[i].ID < result.Licenses[j].ID 124 110 }) 125 111 126 - // second pass to build exceptions and replacements 127 - replaced := strset.New() 112 + // keys are simplified by removing dashes and lowercasing ID 113 + // this is so license declarations in the wild like: LGPL3 LGPL-3 lgpl3 and lgpl-3 can all match 114 + licenseIDs := make(map[string]string) 128 115 for _, l := range result.Licenses { 129 - var multipleID []string 130 - cleanID := strings . ToLower (l.ID) 116 + // licensePerms includes the cleanID in return slice 117 + cleanID := cleanLicenseID (l.ID) 118 + licensePerms := buildLicenseIDPermutations(cleanID) 131 119 132 - var replacement *License 120 + // if license is deprecated, find its replacement and add to licenseIDs 133 121 if l.Deprecated { 134 - replacement = result.findReplacementLicense(l) 122 + idToMap := l.ID 123 + replacement := result.findReplacementLicense(l) 135 124 if replacement != nil { 136 - licenseIDs [ cleanID ] = replacement.ID 125 + idToMap = replacement.ID 126 + } 127 + // it's important to use the original licensePerms here so that the deprecated license 128 + // can now point to the new correct license 129 + for _, id := range licensePerms { 130 + if _, exists := licenseIDs[id]; exists { 131 + // can be used to debug duplicate license permutations and confirm that examples like GPL1 132 + // do not point to GPL-1.1 133 + // log.Println("duplicate license list permutation found when mapping deprecated license to replacement") 134 + // log.Printf("already have key: %q for SPDX ID: %q; attempted to map replacement ID: %q for deprecated ID: %q\n", id, value, replacement.ID, l.ID) 135 + continue 136 + } 137 + licenseIDs[id] = idToMap 137 138 } 138 139 } 139 140 140 - multipleID = append(multipleID, buildLicensePermutations(cleanID)...) 141 - for _, id := range multipleID { 142 - // don't make replacements for IDs that have already been replaced. Since we have a sorted license list 143 - // the earliest replacement is correct (any future replacements are not. 144 - // e.g. replace lgpl-2 with LGPL-2.1-only is wrong, but with LGPL-2.0-only is correct) 145 - if replacement == nil || replaced.Has(id) { 146 - if _, exists := licenseIDs[id]; !exists { 147 - licenseIDs[id] = l.ID 148 - } 149 - } else { 150 - // a useful debugging line during builds 151 - log.Printf("replacing %s with %s\n", id, replacement.ID) 152 - 153 - licenseIDs[id] = replacement.ID 154 - replaced.Add(id) 141 + // if license is not deprecated, add all permutations to licenseIDs 142 + for _, id := range licensePerms { 143 + if _, exists := licenseIDs[id]; exists { 144 + // log.Println("found duplicate license permutation key for non deprecated license") 145 + // log.Printf("already have key: %q for SPDX ID: %q; tried to insert as SPDX ID:%q\n", id, value, l.ID) 146 + continue 155 147 } 148 + licenseIDs[id] = l.ID 156 149 } 157 150 } 158 151 159 152 return licenseIDs 160 153 } 161 154 155 + func cleanLicenseID(id string) string { 156 + cleanID := strings.ToLower(id) 157 + return strings.ReplaceAll(cleanID, "-", "") 158 + } 159 +