diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 1edc1b3e6..a006af8ef 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,7 +1,8 @@ -Public Suffix List (PSL) Pull Request (PR) Template -==== +# Public Suffix List (PSL) Submission -Each PSL PR needs to have a description, rationale, indication of DNS validation and syntax checking, as well as a number of acknowledgements from the submitter. This template must be included with each PR, and the submitting party MUST provide responses to all of the elements in order to be considered. + + ### Checklist of required steps * [ ] Description of Organization * [ ] Robust Reason for PSL Inclusion * [ ] DNS verification via dig -* [ ] Run Syntax Checker (make test) +* [ ] Run Syntax Checker (`make test`) -* [ ] Each domain listed in the PRIVATE section has and shall maintain at least two years remaining on registration, and we shall keep the \_PSL txt record in place in the respective zone(s) in the affected section +* [ ] Each domain listed in the PRIVATE section has and shall maintain at least two years remaining on registration, and we shall keep the `_psl` TXT record in place in the respective zone(s). __Submitter affirms the following:__ - * [ ] We are listing *any* third-party limits that we seek to work around in our rationale such as those between IOS 14.5+ and Facebook (see [Issue #1245](https://github.com/publicsuffix/list/issues/1245) as a well-documented example) - - [Cloudflare](https://developers.cloudflare.com/learning-paths/get-started/add-domain-to-cf/add-site/) - - [Letsencrypt](https://letsencrypt.org/docs/rate-limits/) - - + + * [ ] We are listing *any* third-party limits that we seek to work around in our rationale such as those between IOS 14.5+ and Facebook (see [Issue #1245](https://github.com/publicsuffix/list/issues/1245) as a well-documented example) + - [Cloudflare](https://developers.cloudflare.com/learning-paths/get-started/add-domain-to-cf/add-site/) + - [Let's Encrypt](https://letsencrypt.org/docs/rate-limits/) + - MAKE SURE UPDATE THE FOLLOWING LIST WITH YOUR LIMITATIONS! REMOVE ENTRIES WHICH DO NOT APPLY AS WELL AS REMOVING THIS LINE! - * [ ] This request was _not_ submitted with the objective of working around other third-party limits + * [ ] This request was _not_ submitted with the objective of working around other third-party limits. - * [ ] The submitter acknowledges that it is their responsibility to maintain the domains within their section. This includes removing names which are no longer used, retaining the _psl DNS entry, responding to e-mails to the supplied address. Failure to maintain entries may result in removal of individual entries or the entire section. + * [ ] The submitter acknowledges that it is their responsibility to maintain the domains within their section. This includes removing names which are no longer used, retaining the _psl DNS entry, and responding to e-mails to the supplied address. Failure to maintain entries may result in removal of individual entries or the entire section. - * [ ] The [Guidelines](https://github.com/publicsuffix/list/wiki/Guidelines) were carefully _read_ and _understood_, and this request conforms - * [ ] The submission follows the [guidelines](https://github.com/publicsuffix/list/wiki/Format) on formatting and sorting + * [ ] The [Guidelines](https://github.com/publicsuffix/list/wiki/Guidelines) were carefully _read_ and _understood_, and this request conforms to them. + * [ ] The submission follows the [guidelines](https://github.com/publicsuffix/list/wiki/Format) on formatting and sorting. --- -For Private section requests that are submitting entries for domains that match their organization website's primary domain, please understand that this can have impacts that may not match the desired outcome and take a long time to rollback, if at all. +For PRIVATE section requests that are submitting entries for domains that match their organization website's primary domain, please understand that this can have impacts that may not match the desired outcome and take a long time to rollback, if at all. To ensure that requested changes are entirely intentional, make sure that you read the affectation and propagation expectations, that you understand them, and confirm this understanding. @@ -108,9 +111,9 @@ guidelines. Your request could very likely alter the cookie and certificate (as well as other) behaviours on your core domain name in ways that could be problematic for your business. -Rollback is really not predictable, as those who use or incorporate +Rollbacks are really not predictable, as those who use or incorporate the PSL do what they do, and when. It is not within the PSL volunteers' -control to do anything about that. +control to do anything about that. The volunteers are busy with new requests, and rollbacks are lowest priority, so if something gets broken by your PR, it will potentially @@ -119,32 +122,27 @@ stay that way for an indefinite period of time (typically long). (Link: [about propagation/expectations](https://github.com/publicsuffix/list/wiki/Guidelines#appropriate-expectations-on-derivative-propagation-use-or-inclusion)) - * [ ] *Yes, I understand*. I could break my organization's website cookies etc. and the rollback timing, etc is acceptable. *Proceed*. + * [ ] *Yes, I understand*. I could break my organization's website cookies and cause other issues, and the rollback timing is acceptable. *Proceed anyways*. --- -Description of Organization -==== - +## Description of Organization -Organization Website: - - -Reason for PSL Inclusion -==== +**Organization Website:** + +## Reason for PSL Inclusion -Number of users this request is being made to serve: - - - -DNS Verification via dig -======= +**Number of users this request is being made to serve:** + +## DNS Verification -Results of Syntax Checker (`make test`) -========= - +## Results of Syntax Checker (`make test`) - - diff --git a/.github/workflows/pr_fmt_check.yml b/.github/workflows/psltool_check.yml similarity index 77% rename from .github/workflows/pr_fmt_check.yml rename to .github/workflows/psltool_check.yml index 83272a427..dbbd4d818 100644 --- a/.github/workflows/pr_fmt_check.yml +++ b/.github/workflows/psltool_check.yml @@ -1,7 +1,8 @@ -name: pr-psltool-fmt +name: psltool on: pull_request: + workflow_dispatch: permissions: {} @@ -18,3 +19,4 @@ jobs: run: | cd tools go run ./psltool fmt -d ../public_suffix_list.dat + go run ./psltool validate ../public_suffix_list.dat diff --git a/public_suffix_list.dat b/public_suffix_list.dat index ea83acc07..eaaf05f47 100644 --- a/public_suffix_list.dat +++ b/public_suffix_list.dat @@ -495,6 +495,7 @@ ato.br b.br barueri.br belem.br +bet.br bhz.br bib.br bio.br @@ -582,6 +583,7 @@ joinville.br jor.br jus.br leg.br +leilao.br lel.br log.br londrina.br @@ -6725,7 +6727,7 @@ org.zw // newGTLDs -// List of new gTLDs imported from https://www.icann.org/resources/registries/gtlds/v2/gtlds.json on 2024-08-25T15:14:38Z +// List of new gTLDs imported from https://www.icann.org/resources/registries/gtlds/v2/gtlds.json on 2024-09-26T15:17:07Z // This list is auto-generated, don't edit it manually. // aaa : American Automobile Association, Inc. // https://www.iana.org/domains/root/db/aaa.html @@ -7671,10 +7673,6 @@ cymru // https://www.iana.org/domains/root/db/cyou.html cyou -// dabur : Dabur India Limited -// https://www.iana.org/domains/root/db/dabur.html -dabur - // dad : Charleston Road Registry Inc. // https://www.iana.org/domains/root/db/dad.html dad @@ -9119,6 +9117,10 @@ men // https://www.iana.org/domains/root/db/menu.html menu +// merck : Merck Registry Holdings, Inc. +// https://www.iana.org/domains/root/db/merck.html +merck + // merckmsd : MSD Registry Holdings, Inc. // https://www.iana.org/domains/root/db/merckmsd.html merckmsd @@ -11328,7 +11330,7 @@ myamaze.net // Amazon API Gateway // Submitted by AWS Security -// Reference: 9e37648f-a66c-4655-9ab1-5981f8737197 +// Reference: 6a4f5a95-8c7d-4077-a7af-9cf1abec0a53 execute-api.cn-north-1.amazonaws.com.cn execute-api.cn-northwest-1.amazonaws.com.cn execute-api.af-south-1.amazonaws.com @@ -11342,6 +11344,7 @@ execute-api.ap-southeast-1.amazonaws.com execute-api.ap-southeast-2.amazonaws.com execute-api.ap-southeast-3.amazonaws.com execute-api.ap-southeast-4.amazonaws.com +execute-api.ap-southeast-5.amazonaws.com execute-api.ca-central-1.amazonaws.com execute-api.ca-west-1.amazonaws.com execute-api.eu-central-1.amazonaws.com @@ -11554,7 +11557,7 @@ emrstudio-prod.us-west-2.amazonaws.com // Amazon S3 // Submitted by AWS Security -// Reference: cd5c8b3a-67b7-4b40-9236-c87ce81a3d10 +// Reference: ada5c9df-55e1-4195-a1ce-732d6c81e357 s3.dualstack.cn-north-1.amazonaws.com.cn s3-accesspoint.dualstack.cn-north-1.amazonaws.com.cn s3-website.dualstack.cn-north-1.amazonaws.com.cn @@ -11612,6 +11615,7 @@ s3-object-lambda.ap-south-1.amazonaws.com s3-website.ap-south-1.amazonaws.com s3.dualstack.ap-south-2.amazonaws.com s3-accesspoint.dualstack.ap-south-2.amazonaws.com +s3-website.dualstack.ap-south-2.amazonaws.com s3.ap-south-2.amazonaws.com s3-accesspoint.ap-south-2.amazonaws.com s3-object-lambda.ap-south-2.amazonaws.com @@ -11632,16 +11636,26 @@ s3-object-lambda.ap-southeast-2.amazonaws.com s3-website.ap-southeast-2.amazonaws.com s3.dualstack.ap-southeast-3.amazonaws.com s3-accesspoint.dualstack.ap-southeast-3.amazonaws.com +s3-website.dualstack.ap-southeast-3.amazonaws.com s3.ap-southeast-3.amazonaws.com s3-accesspoint.ap-southeast-3.amazonaws.com s3-object-lambda.ap-southeast-3.amazonaws.com s3-website.ap-southeast-3.amazonaws.com s3.dualstack.ap-southeast-4.amazonaws.com s3-accesspoint.dualstack.ap-southeast-4.amazonaws.com +s3-website.dualstack.ap-southeast-4.amazonaws.com s3.ap-southeast-4.amazonaws.com s3-accesspoint.ap-southeast-4.amazonaws.com s3-object-lambda.ap-southeast-4.amazonaws.com s3-website.ap-southeast-4.amazonaws.com +s3.dualstack.ap-southeast-5.amazonaws.com +s3-accesspoint.dualstack.ap-southeast-5.amazonaws.com +s3-website.dualstack.ap-southeast-5.amazonaws.com +s3.ap-southeast-5.amazonaws.com +s3-accesspoint.ap-southeast-5.amazonaws.com +s3-deprecated.ap-southeast-5.amazonaws.com +s3-object-lambda.ap-southeast-5.amazonaws.com +s3-website.ap-southeast-5.amazonaws.com s3.dualstack.ca-central-1.amazonaws.com s3-accesspoint.dualstack.ca-central-1.amazonaws.com s3-accesspoint-fips.dualstack.ca-central-1.amazonaws.com @@ -11662,6 +11676,7 @@ s3.ca-west-1.amazonaws.com s3-accesspoint.ca-west-1.amazonaws.com s3-accesspoint-fips.ca-west-1.amazonaws.com s3-fips.ca-west-1.amazonaws.com +s3-object-lambda.ca-west-1.amazonaws.com s3-website.ca-west-1.amazonaws.com s3.dualstack.eu-central-1.amazonaws.com s3-accesspoint.dualstack.eu-central-1.amazonaws.com @@ -11672,6 +11687,7 @@ s3-object-lambda.eu-central-1.amazonaws.com s3-website.eu-central-1.amazonaws.com s3.dualstack.eu-central-2.amazonaws.com s3-accesspoint.dualstack.eu-central-2.amazonaws.com +s3-website.dualstack.eu-central-2.amazonaws.com s3.eu-central-2.amazonaws.com s3-accesspoint.eu-central-2.amazonaws.com s3-object-lambda.eu-central-2.amazonaws.com @@ -11691,6 +11707,7 @@ s3-object-lambda.eu-south-1.amazonaws.com s3-website.eu-south-1.amazonaws.com s3.dualstack.eu-south-2.amazonaws.com s3-accesspoint.dualstack.eu-south-2.amazonaws.com +s3-website.dualstack.eu-south-2.amazonaws.com s3.eu-south-2.amazonaws.com s3-accesspoint.eu-south-2.amazonaws.com s3-object-lambda.eu-south-2.amazonaws.com @@ -11718,12 +11735,14 @@ s3-object-lambda.eu-west-3.amazonaws.com s3-website.eu-west-3.amazonaws.com s3.dualstack.il-central-1.amazonaws.com s3-accesspoint.dualstack.il-central-1.amazonaws.com +s3-website.dualstack.il-central-1.amazonaws.com s3.il-central-1.amazonaws.com s3-accesspoint.il-central-1.amazonaws.com s3-object-lambda.il-central-1.amazonaws.com s3-website.il-central-1.amazonaws.com s3.dualstack.me-central-1.amazonaws.com s3-accesspoint.dualstack.me-central-1.amazonaws.com +s3-website.dualstack.me-central-1.amazonaws.com s3.me-central-1.amazonaws.com s3-accesspoint.me-central-1.amazonaws.com s3-object-lambda.me-central-1.amazonaws.com @@ -11792,6 +11811,7 @@ s3.dualstack.us-east-2.amazonaws.com s3-accesspoint.dualstack.us-east-2.amazonaws.com s3-accesspoint-fips.dualstack.us-east-2.amazonaws.com s3-fips.dualstack.us-east-2.amazonaws.com +s3-website.dualstack.us-east-2.amazonaws.com s3.us-east-2.amazonaws.com s3-accesspoint.us-east-2.amazonaws.com s3-accesspoint-fips.us-east-2.amazonaws.com @@ -12127,6 +12147,10 @@ on-aptible.com // Submitted by Aki Ueno f5.si +// ArvanCloud EdgeCompute +// Submitted by ArvanCloud CDN +arvanedge.ir + // ASEINet : https://www.aseinet.com/ // Submitted by Asei SEKIGUCHI user.aseinet.ne.jp @@ -12286,10 +12310,6 @@ vm.bytemark.co.uk // Submitted by Antonio Lain cafjs.com -// callidomus : https://www.callidomus.com/ -// Submitted by Marcus Popp -mycd.eu - // Canva Pty Ltd : https://canva.com/ // Submitted by Joel Aquilina canva-apps.cn @@ -12342,10 +12362,6 @@ uk.net ae.org com.se -// certmgr.org : https://certmgr.org -// Submitted by B. Blechschmidt -certmgr.org - // Cityhost LLC : https://cityhost.ua // Submitted by Maksym Rivtin cx.ua @@ -12473,6 +12489,10 @@ co.no webhosting.be hosting-cluster.nl +// Contentful GmbH : https://www.contentful.com +// Submitted by Contentful Developer Experience Team +ctfcloud.net + // Convex : https://convex.dev/ // Submitted by James Cowling convex.site @@ -12541,11 +12561,6 @@ firm.dk reg.dk store.dk -// Daplie, Inc : https://daplie.com -// Submitted by AJ ONeal -daplie.me -localhost.daplie.me - // dappnode.io : https://dappnode.io/ // Submitted by Abel Boldu / DAppNode Team dyndns.dappnode.io @@ -13099,7 +13114,6 @@ no.eu.org nz.eu.org pl.eu.org pt.eu.org -q-a.eu.org ro.eu.org ru.eu.org se.eu.org @@ -13611,7 +13625,6 @@ blogspot.lt blogspot.lu blogspot.md blogspot.mk -blogspot.mr blogspot.com.mt blogspot.mx blogspot.my @@ -13684,6 +13697,14 @@ häkkinen.fi hs.run hs.zone +// Harrison Network : https://hrsn.net +// Submitted by William Harrison +wdh.app +preview.wdh.app +hrsn.dev +t.hrsn.dev +t.hrsn.net + // Hashbang : https://hashbang.sh hashbang.sh @@ -13753,18 +13774,17 @@ hoplix.shop orx.biz biz.gl biz.ng +co.biz.ng +dl.biz.ng +go.biz.ng +lg.biz.ng +on.biz.ng col.ng firm.ng gen.ng ltd.ng ngo.ng plc.ng -// Reserved Third Level Subdomains for BIZ.NG -co.biz.ng -dl.biz.ng -go.biz.ng -lg.biz.ng -on.biz.ng // HostFly : https://www.ie.ua // Submitted by Bohdan Dub @@ -13773,6 +13793,11 @@ ie.ua // HostyHosting : https://hostyhosting.com hostyhosting.io +// Hugging Face: https://huggingface.co +// Submitted by Eliott Coyac +hf.space +static.hf.space + // Hypernode B.V. : https://www.hypernode.com/ // Submitted by Cipriano Groenendal hypernode.io @@ -13883,6 +13908,10 @@ app-ionos.space // Submitted by Roman Azarenko iopsys.se +// IPFS Project : https://ipfs.tech/ +// Submitted by Interplanetary Shipyard +*.dweb.link + // IPiFony Systems, Inc. : https://www.ipifony.com/ // Submitted by Matthew Hardeman ipifony.net @@ -13912,7 +13941,6 @@ iserv.dev // Submitted by Ihor Kolodyuk mel.cloudlets.com.au cloud.interhostsolutions.be -mycloud.by alp1.ae.flow.ch appengine.flow.ch es-1.axarnet.cloud @@ -13934,7 +13962,6 @@ us.reclaim.cloud ch.trendhosting.cloud de.trendhosting.cloud jele.club -amscompute.com dopaas.com paas.hosted-by-previder.com rag-cloud.hosteur.com @@ -13942,10 +13969,8 @@ rag-cloud-ch.hosteur.com jcloud.ik-server.com jcloud-ver-jpc.ik-server.com demo.jelastic.com -kilatiron.com paas.massivegrid.com jed.wafaicloud.com -lon.wafaicloud.com ryd.wafaicloud.com j.scaleforce.com.cy jelastic.dogado.eu @@ -13957,18 +13982,14 @@ mircloud.host paas.beebyte.io sekd1.beebyteapp.io jele.io -cloud-fr1.unispace.io jc.neen.it -cloud.jelastic.open.tim.it jcloud.kz -upaas.kazteleport.kz cloudjiffy.net fra1-de.cloudjiffy.net west1-us.cloudjiffy.net jls-sto1.elastx.net jls-sto2.elastx.net jls-sto3.elastx.net -faststacks.net fr-1.paas.massivegrid.net lon-1.paas.massivegrid.net lon-2.paas.massivegrid.net @@ -13978,11 +13999,9 @@ sg-1.paas.massivegrid.net jelastic.saveincloud.net nordeste-idc.saveincloud.net j.scaleforce.net -jelastic.tsukaeru.net sdscloud.pl unicloud.pl mircloud.ru -jelastic.regruhosting.ru enscaled.sg jele.site jelastic.team @@ -14022,10 +14041,6 @@ js.org kaas.gg khplay.nl -// Kakao : https://www.kakaocorp.com/ -// Submitted by JaeYoong Lee -ktistory.com - // Kapsi : https://kapsi.fi // Submitted by Tomi Juntunen kapsi.fi @@ -14078,7 +14093,7 @@ lpusercontent.com lelux.site // libp2p project : https://libp2p.io -// Submitted by Interplanetary Shipyard +// Submitted by Interplanetary Shipyard libp2p.direct // Libre IT Ltd : https://libre.nz @@ -14243,12 +14258,9 @@ atmeta.com apps.fbsbx.com // MetaCentrum, CESNET z.s.p.o. : https://www.metacentrum.cz/en/ -// Submitted by Zdeněk Šustr +// Submitted by Zdeněk Šustr and Radim Janča *.cloud.metacentrum.cz custom.metacentrum.cz - -// MetaCentrum, CESNET z.s.p.o. : https://www.metacentrum.cz/en/ -// Submitted by Radim Janča flt.cloud.muni.cz usr.cloud.muni.cz @@ -14299,9 +14311,14 @@ routingthecloud.org // Submitted by Robert Böttinger csx.cc -// MobileEducation, LLC : https://joinforte.com -// Submitted by Grayson Martin -forte.id +// Mittwald CM Service GmbH & Co. KG : https://mittwald.de +// Submitted by Marco Rieger +mydbserver.com +webspaceconfig.de +mittwald.info +mittwaldserver.info +typo3server.info +project.space // MODX Systems LLC : https://modx.com // Submitted by Elizabeth Southwell @@ -14509,7 +14526,6 @@ dnsking.ch mypi.co n4t.co 001www.com -ddnslive.com myiphost.com forumz.info soundcast.me @@ -14525,8 +14541,6 @@ x443.pw now-dns.top ntdll.top freeddns.us -crafting.xyz -zapto.xyz // nsupdate.info : https://www.nsupdate.info/ // Submitted by Thomas Waldmann @@ -14538,7 +14552,7 @@ nerdpol.ovh nyc.mn // O3O.Foundation : https://o3o.foundation/ -// Submitted by the prvcy.page Registry Team +// Submitted by the prvcy.page Registry Team prvcy.page // Obl.ong : @@ -14728,10 +14742,6 @@ platterp.us // Submitted by Henning Pohl pley.games -// Port53 : https://port53.io/ -// Submitted by Maximilian Schieder -dyn53.io - // Porter : https://porter.run/ // Submitted by Rudraksh MK onporter.run @@ -14759,10 +14769,6 @@ xen.prgmr.com // Submitted by registry priv.at -// Protocol Labs : https://protocol.ai/ -// Submitted by Michael Burns -*.dweb.link - // Protonet GmbH : http://protonet.io // Submitted by Martin Meier protonet.io @@ -15148,6 +15154,7 @@ shopitsite.com // shopware AG : https://shopware.com // Submitted by Jens Küper +shopware.shop shopware.store // Siemens Mobility GmbH @@ -15166,12 +15173,6 @@ vipsinaapp.com // Submitted by Skylar Challand siteleaf.net -// Skyhat : http://www.skyhat.io -// Submitted by Shante Adam -bounty-full.com -alpha.bounty-full.com -beta.bounty-full.com - // Small Technology Foundation : https://small-tech.org // Submitted by Aral Balkan small-web.org @@ -15346,11 +15347,6 @@ supabase.co supabase.in supabase.net -// Symfony, SAS : https://symfony.com/ -// Submitted by Fabien Potencier -*.sensiosite.cloud -*.s5y.io - // Syncloud : https://syncloud.org // Submitted by Boris Rybalkin syncloud.it @@ -15474,14 +15470,11 @@ webspace.rocks lima.zone // TransIP : https://www.transip.nl -// Submitted by Rory Breuk +// Submitted by Rory Breuk and Cedric Dubois *.transurl.be *.transurl.eu -*.transurl.nl - -// TransIP: https://www.transip.nl -// Submitted by Cedric Dubois site.transip.me +*.transurl.nl // TuxFamily : http://tuxfamily.org // Submitted by TuxFamily administrators @@ -15577,9 +15570,11 @@ express.val.run web.val.run // Vercel, Inc : https://vercel.com/ -// Submitted by Connor Davis +// Submitted by Max Leiter vercel.app +v0.build vercel.dev +vusercontent.net now.sh // VeryPositive SIA : http://very.lv @@ -15653,12 +15648,6 @@ toolforge.org wmcloud.org wmflabs.org -// William Harrison : https://wdharrison.com -// Submitted by William Harrison -wdh.app -preview.wdh.app -t.hrsn.net - // WISP : https://wisp.gg // Submitted by Stepan Fedotov panel.gg diff --git a/tools/internal/domain/domain.go b/tools/internal/domain/domain.go index 9bc04d6c3..466356057 100644 --- a/tools/internal/domain/domain.go +++ b/tools/internal/domain/domain.go @@ -8,6 +8,7 @@ import ( "fmt" "slices" "strings" + "sync" "golang.org/x/net/idna" "golang.org/x/text/collate" @@ -92,6 +93,19 @@ func (d Name) String() string { return b.String() } +// ASCIIString returns the domain name in its canonicalized ASCII (aka +// "punycode") form. +func (d Name) ASCIIString() string { + var b strings.Builder + for i := len(d.labels) - 1; i >= 0; i-- { + b.WriteString(d.labels[i].ASCIIString()) + if i != 0 { + b.WriteByte('.') + } + } + return b.String() +} + // Compare compares domain names. It returns -1 if d < e, +1 if d > e, // and 0 if d == e. // @@ -176,6 +190,19 @@ func ParseLabel(s string) (Label, error) { func (l Label) String() string { return l.label } +func (l Label) ASCIIString() string { + ret, err := domainValidator.ToASCII(l.label) + if err != nil { + // This should be impossible. Domain labels can only be + // created by ParseLabel, which applies IDNA validation and + // produces a canonical U-label. We're just converting from + // U-label representation to A-label, which is guaranteed to + // succeed given a valid U-label. + panic(fmt.Sprintf("impossible: U-label to A-label conversion failed: %v", err)) + } + return ret +} + // Compare compares domain labels. It returns -1 if l < m, +1 if l > m, // and 0 if l == m. // @@ -203,10 +230,7 @@ func (l Label) Compare(m Label) int { // If two labels aren't equal, we are free to order them however // we want. We choose to order them with the English Unicode // collation. - var buf collate.Buffer - kl := labelCollator.KeyFromString(&buf, l.label) - km := labelCollator.KeyFromString(&buf, m.label) - if res := bytes.Compare(kl, km); res != 0 { + if res := compareLabel(l, m); res != 0 { return res } @@ -297,4 +321,18 @@ var domainValidator = idna.New( // byte compare. However, this option is buggy and silently ignored in // some cases (https://github.com/golang/go/issues/68379), so we do // this tie breaking ourselves in Label.Compare. +var labelCollatorMu sync.Mutex var labelCollator = collate.New(language.English) + +func compareLabel(a, b Label) int { + // Unfortunately individual collators are not safe for concurrent + // use. Wrap them in a global mutex. We could also construct a new + // collator for each use, but that ends up being more expensive + // and less performant than sharing one collator with a mutex. + labelCollatorMu.Lock() + defer labelCollatorMu.Unlock() + var buf collate.Buffer + kl := labelCollator.KeyFromString(&buf, a.label) + km := labelCollator.KeyFromString(&buf, b.label) + return bytes.Compare(kl, km) +} diff --git a/tools/internal/github/pr.go b/tools/internal/github/pr.go index 824e01cc9..4c2a0b5d7 100644 --- a/tools/internal/github/pr.go +++ b/tools/internal/github/pr.go @@ -60,52 +60,49 @@ func (c *Client) PSLForPullRequest(ctx context.Context, prNum int) (withoutPR, w return nil, nil, err } - if state := pr.GetState(); state != "open" { - return nil, nil, fmt.Errorf("cannot get PSL for PR %d with status %q", prNum, state) + mergeCommit := pr.GetMergeCommitSHA() + if mergeCommit == "" { + return nil, nil, fmt.Errorf("no merge commit available for PR %d", prNum) } - if !pr.GetMergeable() { - return nil, nil, fmt.Errorf("cannot get PSL for PR %d, needs rebase", prNum) - } - trialMergeCommit := pr.GetMergeCommitSHA() - if trialMergeCommit == "" { - return nil, nil, fmt.Errorf("no trial merge commit available for PR %d", prNum) - } - - prHeadCommit := pr.GetHead().GetSHA() - if prHeadCommit == "" { - return nil, nil, fmt.Errorf("no commit SHA available for head of PR %d", prNum) - } - - // We want to return the trial merge commit's PSL as withPR, and - // the non-PR parent of that merge as withoutPR. Github only - // provides information about the trial merge commit and the PR - // head commit in the PR API. It also provides a "base" ref, but - // empirical evidence shows this points at some random commit - // somewhere and updates based on unclear triggers. IOW, it is - // _not_ "master without the PR applied". - // - // Instead, we have to ask the git API for information about the - // trial merge commit, and find the correct withoutPR SHA from - // that. - commitInfo, _, err := c.apiClient().Git.GetCommit(ctx, c.owner(), c.repo(), trialMergeCommit) + commitInfo, _, err := c.apiClient().Git.GetCommit(ctx, c.owner(), c.repo(), mergeCommit) if err != nil { - return nil, nil, fmt.Errorf("getting info for trial merge SHA %q: %w", trialMergeCommit, err) + return nil, nil, fmt.Errorf("getting info for trial merge SHA %q: %w", mergeCommit, err) } + var beforeMergeCommit string - if numParents := len(commitInfo.Parents); numParents != 2 { - return nil, nil, fmt.Errorf("unexpected parent count %d for trial merge commit on PR %d, expected 2 parents", numParents, prNum) - } - if commitInfo.Parents[0].GetSHA() == prHeadCommit { - beforeMergeCommit = commitInfo.Parents[1].GetSHA() - } else { + if pr.GetMerged() && len(commitInfo.Parents) == 1 { + // PR was merged, PSL policy is to use squash-and-merge, so + // the pre-PR commit is simply the parent of the merge commit. beforeMergeCommit = commitInfo.Parents[0].GetSHA() + } else if !pr.GetMergeable() { + // PR isn't merged, and there's a merge conflict that prevents + // us from knowing what the pre- and post-merge states are. + return nil, nil, fmt.Errorf("cannot get PSL for PR %d, needs rebase", prNum) + } else { + // PR is either open, or it was merged without squashing. In + // both cases, mergeCommit has 2 parents: one is the PR head + // commit, and the other is the master branch without the PR's + // changes. + if numParents := len(commitInfo.Parents); numParents != 2 { + return nil, nil, fmt.Errorf("unexpected parent count %d for trial merge commit on PR %d, expected 2 parents", numParents, prNum) + } + + prHeadCommit := pr.GetHead().GetSHA() + if prHeadCommit == "" { + return nil, nil, fmt.Errorf("no commit SHA available for head of PR %d", prNum) + } + if commitInfo.Parents[0].GetSHA() == prHeadCommit { + beforeMergeCommit = commitInfo.Parents[1].GetSHA() + } else { + beforeMergeCommit = commitInfo.Parents[0].GetSHA() + } } withoutPR, err = c.PSLForHash(ctx, beforeMergeCommit) if err != nil { return nil, nil, err } - withPR, err = c.PSLForHash(ctx, trialMergeCommit) + withPR, err = c.PSLForHash(ctx, mergeCommit) if err != nil { return nil, nil, err } diff --git a/tools/internal/parser/diff.go b/tools/internal/parser/diff.go index 69f5df335..fab0d540c 100644 --- a/tools/internal/parser/diff.go +++ b/tools/internal/parser/diff.go @@ -2,7 +2,6 @@ package parser import ( "fmt" - "strings" ) // SetBaseVersion sets the list's base of comparison to old, and @@ -304,15 +303,7 @@ func (d *differ) makeKey(b Block, parentKey string) string { // indirectly dirty the block, because the metadata comment // includes the entire comment text in its identity, and will // dirty the parent Suffixes. - // - // Two temporary exceptions: TransIP and MetaCentrum both have - // two blocks each, with different contact emails. Until those - // are fixed, also include the maintainer email in the - // identity to avoid constant false positives. ret := fmt.Sprintf("%s;Suffixes,%q", parentKey, v.Info.Name) - if strings.Contains(v.Info.Name, "MetaCentrum") || strings.Contains(v.Info.Name, "TransIP") { - ret += fmt.Sprintf(",%v", v.Info.Maintainers) - } return ret case *Suffix: return fmt.Sprintf("%s;Suffix,%q", parentKey, v.Domain) diff --git a/tools/internal/parser/exceptions.go b/tools/internal/parser/exceptions.go index 18adf3da6..b1d8d564f 100644 --- a/tools/internal/parser/exceptions.go +++ b/tools/internal/parser/exceptions.go @@ -24,7 +24,7 @@ func exemptFromSorting(entity string) bool { // missingEmail are source code blocks in the private domains section // that are allowed to lack email contact information. var missingEmail = []string{ - "611coin", + "611 blockchain domain name system", "c.la", "co.ca", "DynDNS.com", diff --git a/tools/internal/parser/unicode.go b/tools/internal/parser/unicode.go index 6842ce924..4bda84c7c 100644 --- a/tools/internal/parser/unicode.go +++ b/tools/internal/parser/unicode.go @@ -2,6 +2,7 @@ package parser import ( "bytes" + "sync" "golang.org/x/text/collate" "golang.org/x/text/language" @@ -67,6 +68,14 @@ func compareCommentText(a string, b string) int { // corresponding "sort keys", and then bytes.Compare those. There // are more exhaustive tests for sort key computation, so there is // higher confidence that it works correctly. + // + // Unfortunately individual collators are also not safe for + // concurrent use. Wrap them in a global mutex. We could also + // construct a new collator for each use, but that ends up being + // more expensive and less performant than sharing one collator + // with a mutex. + commentCollatorMu.Lock() + defer commentCollatorMu.Unlock() var buf collate.Buffer ka := commentCollator.KeyFromString(&buf, a) kb := commentCollator.KeyFromString(&buf, b) @@ -77,3 +86,4 @@ func compareCommentText(a string, b string) int { // non-suffix text. See the comment at the start of this file for more // details. var commentCollator = collate.New(language.MustParse("en")) +var commentCollatorMu sync.Mutex diff --git a/tools/psltool/psltool.go b/tools/psltool/psltool.go index 87fb305c7..981eba4b5 100644 --- a/tools/psltool/psltool.go +++ b/tools/psltool/psltool.go @@ -14,6 +14,7 @@ import ( "strconv" "strings" "syscall" + "unicode" "github.com/creachadair/command" "github.com/creachadair/flax" @@ -42,11 +43,14 @@ By default, the given file is updated in place.`, }, { Name: "validate", - Usage: "", + Usage: "", Help: `Check that a file is a valid PSL file. Validation includes basic issues like parse errors, as well as -conformance with the PSL project's style rules and policies.`, +conformance with the PSL project's style rules and policies. + +The argument can be either a local file, or a git commit hash to fetch +from https://github.com/publicsuffix/list.`, SetFlags: command.Flags(flax.MustBind, &validateArgs), Run: command.Adapt(runValidate), }, @@ -132,10 +136,30 @@ var validateArgs struct { Online bool `flag:"online-checks,Run validations that require querying third-party servers"` } -func runValidate(env *command.Env, path string) error { - bs, err := os.ReadFile(path) +func isHex(s string) bool { + for _, r := range s { + if !unicode.In(r, unicode.ASCII_Hex_Digit) { + return false + } + } + return true +} + +func runValidate(env *command.Env, pathOrHash string) error { + var bs []byte + var err error + if _, err = os.Stat(pathOrHash); err == nil { + // input is a local file + bs, err = os.ReadFile(pathOrHash) + } else if isHex(pathOrHash) { + // input looks like a git hash + client := github.Client{} + bs, err = client.PSLForHash(context.Background(), pathOrHash) + } else { + return fmt.Errorf("Failed to read PSL file %q, not a local file or a git commit hash", pathOrHash) + } if err != nil { - return fmt.Errorf("Failed to read PSL file: %w", err) + return fmt.Errorf("Failed to read PSL file %q: %w", pathOrHash, err) } psl, errs := parser.Parse(bs)