diff --git a/CHANGELOG.md b/CHANGELOG.md index 34c26992438..8df9d7452f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ * `cortex_alertmanager_state_replication_failed_total` * `cortex_alertmanager_alerts` * `cortex_alertmanager_silences` -* [FEATURE] Querier: add experimental streaming PromQL engine, enabled with `-querier.query-engine=mimir`. #9367 #9368 #9398 #9399 #9403 #9417 #9418 #9419 #9420 #9482 #9504 #9505 #9507 #9518 #9531 #9532 #9533 #9553 #9558 #9588 #9589 +* [FEATURE] Querier: add experimental streaming PromQL engine, enabled with `-querier.query-engine=mimir`. #9367 #9368 #9398 #9399 #9403 #9417 #9418 #9419 #9420 #9482 #9504 #9505 #9507 #9518 #9531 #9532 #9533 #9553 #9558 #9588 #9589 #9639 * [FEATURE] Query-frontend: added experimental configuration options `query-frontend.cache-errors` and `query-frontend.results-cache-ttl-for-errors` to allow non-transient responses to be cached. When set to `true` error responses from hitting limits or bad data are cached for a short TTL. #9028 * [FEATURE] gRPC: Support S2 compression. #9322 * `-alertmanager.alertmanager-client.grpc-compression=s2` @@ -28,6 +28,7 @@ * [FEATURE] Alertmanager: limit added for maximum size of the Grafana configuration (`-alertmanager.max-config-size-bytes`). #9402 * [FEATURE] Ingester: Experimental support for ingesting out-of-order native histograms. This is disabled by default and can be enabled by setting `-ingester.ooo-native-histograms-ingestion-enabled` to `true`. #7175 * [FEATURE] Distributor: Added `-api.skip-label-count-validation-header-enabled` option to allow skipping label count validation on the HTTP write path based on `X-Mimir-SkipLabelCountValidation` header being `true` or not. #9576 +* [FEATURE] Ruler: Add experimental support for caching the contents of rule groups. This is disabled by default and can be enabled by setting `-ruler-storage.cache.rule-group-enabled`. #9595 * [ENHANCEMENT] mimirtool: Adds bearer token support for mimirtool's analyze ruler/prometheus commands. #9587 * [ENHANCEMENT] Ruler: Support `exclude_alerts` parameter in `/api/v1/rules` endpoint. #9300 * [ENHANCEMENT] Distributor: add a metric to track tenants who are sending newlines in their label values called `cortex_distributor_label_values_with_newlines_total`. #9400 @@ -68,6 +69,7 @@ ### Query-tee +* [CHANGE] Don't compare responses for cancelled requests. #9640 * [FEATURE] Added `-proxy.compare-skip-samples-before` to skip samples before the given time when comparing responses. The time can be in RFC3339 format (or) RFC3339 without the timezone and seconds (or) date only. #9515 ### Documentation diff --git a/cmd/mimir/config-descriptor.json b/cmd/mimir/config-descriptor.json index 056c87e0e17..31a8f9f6463 100644 --- a/cmd/mimir/config-descriptor.json +++ b/cmd/mimir/config-descriptor.json @@ -13669,6 +13669,17 @@ "required": false, "desc": "", "blockEntries": [ + { + "kind": "field", + "name": "rule_group_enabled", + "required": false, + "desc": "Enabling caching of rule group contents if a cache backend is configured.", + "fieldValue": null, + "fieldDefaultValue": false, + "fieldFlag": "ruler-storage.cache.rule-group-enabled", + "fieldType": "boolean", + "fieldCategory": "experimental" + }, { "kind": "field", "name": "backend", diff --git a/cmd/mimir/help-all.txt.tmpl b/cmd/mimir/help-all.txt.tmpl index afed2ba684a..07a3208b5b1 100644 --- a/cmd/mimir/help-all.txt.tmpl +++ b/cmd/mimir/help-all.txt.tmpl @@ -2497,6 +2497,8 @@ Usage of ./cmd/mimir/mimir: [deprecated] Username to use when connecting to Redis. -ruler-storage.cache.redis.write-timeout duration [deprecated] Client write timeout. (default 3s) + -ruler-storage.cache.rule-group-enabled + [experimental] Enabling caching of rule group contents if a cache backend is configured. -ruler-storage.filesystem.dir string Local filesystem storage directory. (default "ruler") -ruler-storage.gcs.bucket-name string diff --git a/development/mimir-monolithic-mode/docker-compose.yml b/development/mimir-monolithic-mode/docker-compose.yml index 7a8236a0083..95c0431a6d3 100644 --- a/development/mimir-monolithic-mode/docker-compose.yml +++ b/development/mimir-monolithic-mode/docker-compose.yml @@ -4,17 +4,17 @@ services: image: consul:1.15 # latest tag not supported command: [ "agent", "-dev" ,"-client=0.0.0.0", "-log-level=info" ] ports: - - 8500:8500 + - 8510:8500 minio: image: minio/minio - command: [ "server", "--console-address", ":9001", "/data" ] + command: [ "server", "--console-address", ":9101", "/data" ] environment: - MINIO_ROOT_USER=mimir - MINIO_ROOT_PASSWORD=supersecret ports: - - 9000:9000 - - 9001:9001 + - 9100:9100 + - 9101:9101 volumes: - .data-minio:/data:delegated @@ -26,7 +26,7 @@ services: volumes: - ./config:/etc/prometheus ports: - - 9090:9090 + - 9190:9090 grafana: environment: @@ -48,7 +48,7 @@ services: volumes: - ./config:/etc/agent-config ports: - - 9091:9091 + - 9191:9091 grafana-alloy: profiles: @@ -63,7 +63,7 @@ services: jaeger: image: jaegertracing/all-in-one ports: - - 16686:16686 + - 16681:16686 - "14268" mimir-1: @@ -71,7 +71,7 @@ services: context: . dockerfile: dev.dockerfile image: mimir - command: ["sh", "-c", "sleep 3 && exec ./mimir -config.file=./config/mimir.yaml -target=all -server.http-listen-port=8001 -server.grpc-listen-port=9001"] + command: ["sh", "-c", "sleep 3 && exec ./mimir -config.file=./config/mimir.yaml -target=all -server.http-listen-port=8001 -server.grpc-listen-port=9101"] depends_on: - consul - minio @@ -83,7 +83,7 @@ services: - JAEGER_SAMPLER_PARAM=1 - JAEGER_REPORTER_MAX_QUEUE_SIZE=1000 ports: - - 8001:8001 + - 8101:8001 volumes: - ./config:/mimir/config - .data-mimir-1:/data:delegated @@ -105,7 +105,7 @@ services: - JAEGER_SAMPLER_PARAM=1 - JAEGER_REPORTER_MAX_QUEUE_SIZE=1000 ports: - - 8002:8002 + - 8102:8002 volumes: - ./config:/mimir/config - .data-mimir-2:/data:delegated diff --git a/development/mimir-read-write-mode/docker-compose.jsonnet b/development/mimir-read-write-mode/docker-compose.jsonnet index efe33445083..ff2d772b6a8 100644 --- a/development/mimir-read-write-mode/docker-compose.jsonnet +++ b/development/mimir-read-write-mode/docker-compose.jsonnet @@ -71,7 +71,7 @@ std.manifestYamlDoc({ 'QUERY_FRONTEND_HOST=mimir-read-1:8080', 'COMPACTOR_HOST=mimir-backend-1:8080', ], - ports: ['8080:8080'], + ports: ['8780:8080'], volumes: ['../common/config:/etc/nginx/templates'], }, }, @@ -79,11 +79,11 @@ std.manifestYamlDoc({ minio:: { minio: { image: 'minio/minio', - command: ['server', '--console-address', ':9001', '/data'], + command: ['server', '--console-address', ':9701', '/data'], environment: ['MINIO_ROOT_USER=mimir', 'MINIO_ROOT_PASSWORD=supersecret'], ports: [ - '9000:9000', - '9001:9001', + '9700:9700', + '9701:9701', ], volumes: ['.data-minio:/data:delegated'], }, @@ -116,7 +116,7 @@ std.manifestYamlDoc({ image: 'grafana/agent:v0.37.3', command: ['-config.file=/etc/agent-config/grafana-agent.yaml', '-metrics.wal-directory=/tmp', '-server.http.address=127.0.0.1:9091'], volumes: ['./config:/etc/agent-config'], - ports: ['9091:9091'], + ports: ['9791:9091'], }, }, @@ -131,7 +131,7 @@ std.manifestYamlDoc({ volumes: [ './config:/etc/prometheus', ], - ports: ['9090:9090'], + ports: ['9790:9090'], }, }, @@ -168,7 +168,7 @@ std.manifestYamlDoc({ ], hostname: options.name, // Only publish HTTP port, but not gRPC one. - ports: ['%d:8080' % options.publishedHttpPort], + ports: ['%d:8080' % (options.publishedHttpPort + 700)], depends_on: options.dependsOn, volumes: ['./config:/mimir/config', './activity:/activity'] + options.extraVolumes, }, diff --git a/development/mimir-read-write-mode/docker-compose.yml b/development/mimir-read-write-mode/docker-compose.yml index 08d31a521ca..461b06b30d8 100644 --- a/development/mimir-read-write-mode/docker-compose.yml +++ b/development/mimir-read-write-mode/docker-compose.yml @@ -15,7 +15,7 @@ - "-server.http.address=127.0.0.1:9091" "image": "grafana/agent:v0.37.3" "ports": - - "9091:9091" + - "9791:9091" "volumes": - "./config:/etc/agent-config" "memcached": @@ -35,7 +35,7 @@ "hostname": "mimir-backend-1" "image": "mimir" "ports": - - "8006:8080" + - "8706:8080" "volumes": - "./config:/mimir/config" - "./activity:/activity" @@ -54,7 +54,7 @@ "hostname": "mimir-backend-2" "image": "mimir" "ports": - - "8007:8080" + - "8707:8080" "volumes": - "./config:/mimir/config" - "./activity:/activity" @@ -73,7 +73,7 @@ "hostname": "mimir-read-1" "image": "mimir" "ports": - - "8004:8080" + - "8704:8080" "volumes": - "./config:/mimir/config" - "./activity:/activity" @@ -92,7 +92,7 @@ "hostname": "mimir-read-2" "image": "mimir" "ports": - - "8005:8080" + - "8705:8080" "volumes": - "./config:/mimir/config" - "./activity:/activity" @@ -111,7 +111,7 @@ "hostname": "mimir-write-1" "image": "mimir" "ports": - - "8001:8080" + - "8701:8080" "volumes": - "./config:/mimir/config" - "./activity:/activity" @@ -131,7 +131,7 @@ "hostname": "mimir-write-2" "image": "mimir" "ports": - - "8002:8080" + - "8702:8080" "volumes": - "./config:/mimir/config" - "./activity:/activity" @@ -151,7 +151,7 @@ "hostname": "mimir-write-3" "image": "mimir" "ports": - - "8003:8080" + - "8703:8080" "volumes": - "./config:/mimir/config" - "./activity:/activity" @@ -160,15 +160,15 @@ "command": - "server" - "--console-address" - - ":9001" + - ":9701" - "/data" "environment": - "MINIO_ROOT_USER=mimir" - "MINIO_ROOT_PASSWORD=supersecret" "image": "minio/minio" "ports": - - "9000:9000" - - "9001:9001" + - "9700:9700" + - "9701:9701" "volumes": - ".data-minio:/data:delegated" "nginx": @@ -182,7 +182,7 @@ "hostname": "nginx" "image": "nginxinc/nginx-unprivileged:1.22-alpine" "ports": - - "8080:8080" + - "8780:8080" "volumes": - "../common/config:/etc/nginx/templates" "prometheus": @@ -192,6 +192,6 @@ - "--enable-feature=native-histograms" "image": "prom/prometheus:v2.53.0" "ports": - - "9090:9090" + - "9790:9090" "volumes": - "./config:/etc/prometheus" diff --git a/docs/sources/mimir/configure/about-versioning.md b/docs/sources/mimir/configure/about-versioning.md index 64b69a35081..43e6af006a1 100644 --- a/docs/sources/mimir/configure/about-versioning.md +++ b/docs/sources/mimir/configure/about-versioning.md @@ -71,6 +71,8 @@ The following features are currently experimental: - Allow control over rule sync intervals. - `ruler.outbound-sync-queue-poll-interval` - `ruler.inbound-sync-queue-poll-interval` + - Cache rule group contents. + - `-ruler-storage.cache.rule-group-enabled` - Distributor - Metrics relabeling - `-distributor.metric-relabeling-enabled` diff --git a/docs/sources/mimir/configure/configuration-parameters/index.md b/docs/sources/mimir/configure/configuration-parameters/index.md index 4ab17090b42..a7480f68739 100644 --- a/docs/sources/mimir/configure/configuration-parameters/index.md +++ b/docs/sources/mimir/configure/configuration-parameters/index.md @@ -2166,6 +2166,11 @@ local: [directory: | default = ""] cache: + # (experimental) Enabling caching of rule group contents if a cache backend is + # configured. + # CLI flag: -ruler-storage.cache.rule-group-enabled + [rule_group_enabled: | default = false] + # Backend for ruler storage cache, if not empty. The cache is supported for # any storage backend except "local". Supported values: memcached, redis. # CLI flag: -ruler-storage.cache.backend diff --git a/docs/sources/mimir/manage/mimir-runbooks/_index.md b/docs/sources/mimir/manage/mimir-runbooks/_index.md index ab23693a68c..0ca3da61303 100644 --- a/docs/sources/mimir/manage/mimir-runbooks/_index.md +++ b/docs/sources/mimir/manage/mimir-runbooks/_index.md @@ -936,7 +936,7 @@ How to **investigate**: - Check the number of endpoints matching the `gossip-ring` service: ``` - kubectl --namespace get endpoints gossip-ring + kubectl --namespace get endpoints gossip-ring -ojson | jq '.subsets[].addresses | length' ``` - If the number of endpoints is 1000 then it means you reached the Kubernetes limit, the endpoints get truncated and you could be hit by [this bug](https://github.com/kubernetes/kubernetes/issues/127370). Having more than 1000 pods diff --git a/go.mod b/go.mod index 9fc083a6ef0..850519209f1 100644 --- a/go.mod +++ b/go.mod @@ -34,7 +34,7 @@ require ( github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b github.com/pkg/errors v0.9.1 github.com/prometheus/alertmanager v0.27.0 - github.com/prometheus/client_golang v1.20.4 + github.com/prometheus/client_golang v1.20.5 github.com/prometheus/client_model v0.6.1 github.com/prometheus/common v0.59.1 github.com/prometheus/prometheus v1.99.0 @@ -73,7 +73,7 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 github.com/okzk/sdnotify v0.0.0-20240725214427-1c1fdd37c5ac github.com/prometheus/procfs v0.15.1 - github.com/shirou/gopsutil/v4 v4.24.8 + github.com/shirou/gopsutil/v4 v4.24.9 github.com/thanos-io/objstore v0.0.0-20241010161353-f90c89a0ef90 github.com/twmb/franz-go v1.17.1 github.com/twmb/franz-go/pkg/kadm v1.13.0 @@ -89,7 +89,7 @@ require ( golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c golang.org/x/term v0.25.0 google.golang.org/api v0.196.0 - google.golang.org/protobuf v1.34.2 + google.golang.org/protobuf v1.35.1 sigs.k8s.io/kustomize/kyaml v0.16.0 ) @@ -105,6 +105,7 @@ require ( github.com/Masterminds/sprig/v3 v3.2.1 // indirect github.com/at-wat/mqtt-go v0.19.4 // indirect github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3 // indirect + github.com/ebitengine/purego v0.8.0 // indirect github.com/go-ini/ini v1.67.0 // indirect github.com/go-jose/go-jose/v4 v4.0.1 // indirect github.com/go-ole/go-ole v1.2.6 // indirect @@ -127,7 +128,6 @@ require ( github.com/pires/go-proxyproto v0.7.0 // indirect github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect github.com/ryanuber/go-glob v1.0.0 // indirect - github.com/shoenig/go-m1cpu v0.1.6 // indirect github.com/shopspring/decimal v1.2.0 // indirect github.com/spf13/cast v1.5.0 // indirect github.com/tklauser/go-sysconf v0.3.12 // indirect @@ -226,7 +226,7 @@ require ( github.com/josharian/intern v1.0.0 // indirect github.com/jpillora/backoff v1.0.0 // indirect github.com/julienschmidt/httprouter v1.3.0 // indirect - github.com/klauspost/compress v1.17.10 + github.com/klauspost/compress v1.17.11 github.com/klauspost/cpuid/v2 v2.2.8 // indirect github.com/kylelemons/godebug v1.1.0 // indirect github.com/mailru/easyjson v0.7.7 // indirect @@ -275,14 +275,14 @@ require ( golang.org/x/tools v0.26.0 // indirect google.golang.org/genproto v0.0.0-20240903143218-8af14fe29dc1 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20240827150818-7e3bb234dfed // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240930140551-af27646dc61f + google.golang.org/genproto/googleapis/rpc v0.0.0-20241015192408-796eee8c2d53 k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect sigs.k8s.io/yaml v1.4.0 // indirect ) // Using a fork of Prometheus with Mimir-specific changes. -replace github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20241015102654-eaa7eae2d877 +replace github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20241016140351-5e9a771537ba // Replace memberlist with our fork which includes some fixes that haven't been // merged upstream yet: diff --git a/go.sum b/go.sum index 2f00d2f0e6c..61fe6285365 100644 --- a/go.sum +++ b/go.sum @@ -965,6 +965,8 @@ github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3 github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= +github.com/ebitengine/purego v0.8.0 h1:JbqvnEzRvPpxhCJzJJ2y0RbiZ8nyjccVUrSM3q+GvvE= +github.com/ebitengine/purego v0.8.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ= github.com/edsrzf/mmap-go v1.1.0 h1:6EUwBLQ/Mcr1EYLE4Tn1VdW1A4ckqCQWZBw8Hr0kjpQ= github.com/edsrzf/mmap-go v1.1.0/go.mod h1:19H/e8pUPLicwkyNgOykDXkJ9F0MHE+Z52B8EIth78Q= github.com/efficientgo/core v1.0.0-rc.0.0.20221201130417-ba593f67d2a4 h1:rydBwnBoywKQMjWF0z8SriYtQ+uUcaFsxuijMjJr5PI= @@ -1268,8 +1270,8 @@ github.com/grafana/gomemcache v0.0.0-20240229205252-cd6a66d6fb56 h1:X8IKQ0wu40wp github.com/grafana/gomemcache v0.0.0-20240229205252-cd6a66d6fb56/go.mod h1:PGk3RjYHpxMM8HFPhKKo+vve3DdlPUELZLSDEFehPuU= github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe h1:yIXAAbLswn7VNWBIvM71O2QsgfgW9fRXZNR0DXe6pDU= github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= -github.com/grafana/mimir-prometheus v0.0.0-20241015102654-eaa7eae2d877 h1:HRrL6+agPwe5uuKwI7mdR8f5rUERPB8HuDw3UX10kHM= -github.com/grafana/mimir-prometheus v0.0.0-20241015102654-eaa7eae2d877/go.mod h1:oyDm7JaLUh+QGuGkC7iXC8IyTUq5rlh1ba2CRm9DlVg= +github.com/grafana/mimir-prometheus v0.0.0-20241016140351-5e9a771537ba h1:vTiUTY+yGoXJAxKA8h2GC6ynHoxbuMjTdp/pVrXlE/E= +github.com/grafana/mimir-prometheus v0.0.0-20241016140351-5e9a771537ba/go.mod h1:AvqWbZV+ePQxe3xQT4IX1iP1/evmMJKI7sNxzx20S6Y= github.com/grafana/opentracing-contrib-go-stdlib v0.0.0-20230509071955-f410e79da956 h1:em1oddjXL8c1tL0iFdtVtPloq2hRPen2MJQKoAWpxu0= github.com/grafana/opentracing-contrib-go-stdlib v0.0.0-20230509071955-f410e79da956/go.mod h1:qtI1ogk+2JhVPIXVc6q+NHziSmy2W5GbdQZFUHADCBU= github.com/grafana/prometheus-alertmanager v0.25.1-0.20240924175849-b8b7c2c74eb6 h1:nT8QXdJo6wHMBcF0xEoXxEWkoUZOyzV/jyi/u9l7YEk= @@ -1411,8 +1413,8 @@ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE= github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= -github.com/klauspost/compress v1.17.10 h1:oXAz+Vh0PMUvJczoi+flxpnBEPxoER1IaAnU/NMPtT0= -github.com/klauspost/compress v1.17.10/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM= @@ -1581,8 +1583,8 @@ github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3O github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= github.com/prometheus/client_golang v1.11.1/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= -github.com/prometheus/client_golang v1.20.4 h1:Tgh3Yr67PaOv/uTqloMsCEdeuFTatm5zIq5+qNN23vI= -github.com/prometheus/client_golang v1.20.4/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= +github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= +github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= @@ -1641,12 +1643,8 @@ github.com/sercand/kuberesolver/v5 v5.1.1 h1:CYH+d67G0sGBj7q5wLK61yzqJJ8gLLC8aep github.com/sercand/kuberesolver/v5 v5.1.1/go.mod h1:Fs1KbKhVRnB2aDWN12NjKCB+RgYMWZJ294T3BtmVCpQ= github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0= github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= -github.com/shirou/gopsutil/v4 v4.24.8 h1:pVQjIenQkIhqO81mwTaXjTzOMT7d3TZkf43PlVFHENI= -github.com/shirou/gopsutil/v4 v4.24.8/go.mod h1:wE0OrJtj4dG+hYkxqDH3QiBICdKSf04/npcvLLc/oRg= -github.com/shoenig/go-m1cpu v0.1.6 h1:nxdKQNcEB6vzgA2E2bvzKIYRuNj7XNJ4S/aRSwKzFtM= -github.com/shoenig/go-m1cpu v0.1.6/go.mod h1:1JJMcUBvfNwpq05QDQVAnx3gUHr9IYF7GNg9SUEw2VQ= -github.com/shoenig/test v0.6.4 h1:kVTaSd7WLz5WZ2IaoM0RSzRsUD+m8wRR+5qvntpn4LU= -github.com/shoenig/test v0.6.4/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnjqq0k= +github.com/shirou/gopsutil/v4 v4.24.9 h1:KIV+/HaHD5ka5f570RZq+2SaeFsb/pq+fp2DGNWYoOI= +github.com/shirou/gopsutil/v4 v4.24.9/go.mod h1:3fkaHNeYsUFCGZ8+9vZVWtbyM1k2eRnlL+bWO8Bxa/Q= github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ= github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o= github.com/shurcooL/httpfs v0.0.0-20230704072500-f1e31cf0ba5c h1:aqg5Vm5dwtvL+YgDpBcK1ITf3o96N/K7/wsRXQnUTEs= @@ -2499,8 +2497,8 @@ google.golang.org/genproto/googleapis/rpc v0.0.0-20230803162519-f966b187b2e5/go. google.golang.org/genproto/googleapis/rpc v0.0.0-20240318140521-94a12d6c2237/go.mod h1:WtryC6hu0hhx87FDGxWCDptyssuo68sk10vYjF+T9fY= google.golang.org/genproto/googleapis/rpc v0.0.0-20240521202816-d264139d666e/go.mod h1:EfXuqaE1J41VCDicxHzUDm+8rk+7ZdXzHV0IhO/I6s0= google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157/go.mod h1:EfXuqaE1J41VCDicxHzUDm+8rk+7ZdXzHV0IhO/I6s0= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240930140551-af27646dc61f h1:cUMEy+8oS78BWIH9OWazBkzbr090Od9tWBNtZHkOhf0= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240930140551-af27646dc61f/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241015192408-796eee8c2d53 h1:X58yt85/IXCx0Y3ZwN6sEIKZzQtDEYaBWrDvErdXrRE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241015192408-796eee8c2d53/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI= google.golang.org/grpc v1.65.0 h1:bs/cUb4lp1G5iImFFd3u5ixQzweKizoZJAwBNLR42lc= google.golang.org/grpc v1.65.0/go.mod h1:WgYC2ypjlB0EiQi6wdKixMqukr6lBc0Vo+oOgjrM5ZQ= google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.1.0/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw= @@ -2525,8 +2523,8 @@ google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqw google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= -google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= -google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= +google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= +google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc h1:2gGKlE2+asNV9m7xrywl36YYNnBG5ZQ0r/BOOxqPpmk= gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc/go.mod h1:m7x9LTH6d71AHyAX77c9yqWCCa3UKHcVEj9y7hAtKDk= diff --git a/operations/helm/charts/mimir-distributed/Chart.yaml b/operations/helm/charts/mimir-distributed/Chart.yaml index 0f16bea7b28..9dc0c302bb3 100644 --- a/operations/helm/charts/mimir-distributed/Chart.yaml +++ b/operations/helm/charts/mimir-distributed/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v2 -version: 5.5.0 -appVersion: 2.14.0 +version: 5.6.0-weekly.312 +appVersion: r312 description: "Grafana Mimir" home: https://grafana.com/docs/helm-charts/mimir-distributed/latest/ icon: https://grafana.com/static/img/logos/logo-mimir.svg diff --git a/operations/helm/charts/mimir-distributed/README.md b/operations/helm/charts/mimir-distributed/README.md index a7eb1b4d789..dc4ae21ca79 100644 --- a/operations/helm/charts/mimir-distributed/README.md +++ b/operations/helm/charts/mimir-distributed/README.md @@ -4,7 +4,7 @@ Helm chart for deploying [Grafana Mimir](https://grafana.com/docs/mimir/latest/) For the full documentation, visit [Grafana mimir-distributed Helm chart documentation](https://grafana.com/docs/helm-charts/mimir-distributed/latest/). -> **Note:** The documentation version is derived from the Helm chart version which is 5.5.0. +> **Note:** The documentation version is derived from the Helm chart version which is 5.6.0-weekly.312. When upgrading from Helm chart version 4.X, please see [Migrate the Helm chart from version 4.x to 5.0](https://grafana.com/docs/helm-charts/mimir-distributed/latest/migration-guides/migrate-helm-chart-4.x-to-5.0/). When upgrading from Helm chart version 3.x, please see [Migrate from single zone to zone-aware replication with Helm](https://grafana.com/docs/helm-charts/mimir-distributed/latest/migration-guides/migrate-from-single-zone-with-helm/). @@ -14,7 +14,7 @@ When upgrading from Helm chart version 2.1, please see [Upgrade the Grafana Mimi # mimir-distributed -![Version: 5.5.0](https://img.shields.io/badge/Version-5.5.0-informational?style=flat-square) ![AppVersion: 2.14.0](https://img.shields.io/badge/AppVersion-2.14.0-informational?style=flat-square) +![Version: 5.6.0-weekly.312](https://img.shields.io/badge/Version-5.6.0--weekly.312-informational?style=flat-square) ![AppVersion: r312](https://img.shields.io/badge/AppVersion-r312-informational?style=flat-square) Grafana Mimir diff --git a/operations/helm/charts/mimir-distributed/RELEASE.md b/operations/helm/charts/mimir-distributed/RELEASE.md index f0141b0dcf1..c6c3ac89436 100644 --- a/operations/helm/charts/mimir-distributed/RELEASE.md +++ b/operations/helm/charts/mimir-distributed/RELEASE.md @@ -96,6 +96,8 @@ Weekly releases have the version `x.y.z-weekly.w`, for example `3.1.0-weekly.196 For example, `2.6.0`. + - Add a changelog entry in `mimir-distributed/CHANGELOG.md` about upgading the chart's versions of Mimir and GEM. + - Create or update the release notes in `docs/sources/helm-charts/mimir-distributed/release-notes` directory. The release notes should refer to the correct Mimir and GEM versions and their specific documentation version. @@ -167,6 +169,8 @@ The [release process](https://github.com/grafana/mimir/blob/main/.github/workflo - There shouldn't be anymore update needed in documentation because that has been done in the release candidate step above. + > **Note:** Check that the final versions of Mimir and GEM defined in the chart match those mentioned in the changelog and the release notes. + - From the root directory of the repository, run `make doc` to update [README.md](https://github.com/grafana/mimir/blob/main/operations/helm/charts/mimir-distributed/README.md) file. 1. Open PR to release branch diff --git a/operations/helm/charts/mimir-distributed/values.yaml b/operations/helm/charts/mimir-distributed/values.yaml index 877835848e2..03fb978abf6 100644 --- a/operations/helm/charts/mimir-distributed/values.yaml +++ b/operations/helm/charts/mimir-distributed/values.yaml @@ -34,7 +34,7 @@ image: # -- Grafana Mimir container image repository. Note: for Grafana Enterprise Metrics use the value 'enterprise.image.repository' repository: grafana/mimir # -- Grafana Mimir container image tag. Note: for Grafana Enterprise Metrics use the value 'enterprise.image.tag' - tag: 2.14.0 + tag: r312-ef3ece3 # -- Container pull policy - shared between Grafana Mimir and Grafana Enterprise Metrics pullPolicy: IfNotPresent # -- Optionally specify an array of imagePullSecrets - shared between Grafana Mimir and Grafana Enterprise Metrics @@ -3862,7 +3862,7 @@ enterprise: # -- Grafana Enterprise Metrics container image repository. Note: for Grafana Mimir use the value 'image.repository' repository: grafana/enterprise-metrics # -- Grafana Enterprise Metrics container image tag. Note: for Grafana Mimir use the value 'image.tag' - tag: v2.14.0 + tag: r312-bedb3415 # Note: pullPolicy and optional pullSecrets are set in toplevel 'image' section, not here # In order to use Grafana Enterprise Metrics features, you will need to provide the contents of your Grafana Enterprise Metrics diff --git a/pkg/blockbuilder/metrics.go b/pkg/blockbuilder/metrics.go index 59ad431603e..d9db46c6a4f 100644 --- a/pkg/blockbuilder/metrics.go +++ b/pkg/blockbuilder/metrics.go @@ -44,6 +44,7 @@ func newBlockBuilderMetrics(reg prometheus.Registerer) blockBuilderMetrics { } type tsdbBuilderMetrics struct { + processSamplesDiscarded *prometheus.CounterVec compactAndUploadDuration *prometheus.HistogramVec compactAndUploadFailed *prometheus.CounterVec } @@ -51,6 +52,11 @@ type tsdbBuilderMetrics struct { func newTSDBBBuilderMetrics(reg prometheus.Registerer) tsdbBuilderMetrics { var m tsdbBuilderMetrics + m.processSamplesDiscarded = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Name: "cortex_blockbuilder_tsdb_process_samples_discarded_total", + Help: "The total number of samples that were discarded while processing records in one partition.", + }, []string{"partition"}) + m.compactAndUploadDuration = promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{ Name: "cortex_blockbuilder_tsdb_compact_and_upload_duration_seconds", Help: "Time spent compacting and uploading a tsdb of one partition.", diff --git a/pkg/blockbuilder/tsdb.go b/pkg/blockbuilder/tsdb.go index ec4d6394db6..dbae22646d6 100644 --- a/pkg/blockbuilder/tsdb.go +++ b/pkg/blockbuilder/tsdb.go @@ -106,9 +106,11 @@ func (b *TSDBBuilder) Process(ctx context.Context, rec *kgo.Record, lastBlockMax }() var ( - labelsBuilder labels.ScratchBuilder - nonCopiedLabels labels.Labels + labelsBuilder labels.ScratchBuilder + nonCopiedLabels labels.Labels + allSamplesProcessed = true + discardedSamples = 0 ) for _, ts := range req.Timeseries { mimirpb.FromLabelAdaptersOverwriteLabels(&labelsBuilder, ts.Labels, &nonCopiedLabels) @@ -141,10 +143,12 @@ func (b *TSDBBuilder) Process(ctx context.Context, rec *kgo.Record, lastBlockMax } } - // Only abort the processing on a terminal error. - // TODO(v): add metrics for non-terminal errors - if err := checkTSDBAppendError(err); err != nil { - return false, err + if err != nil { + // Only abort the processing on a terminal error. + if err := checkTSDBAppendError(err); err != nil { + return false, err + } + discardedSamples++ } } @@ -183,16 +187,23 @@ func (b *TSDBBuilder) Process(ctx context.Context, rec *kgo.Record, lastBlockMax } } - // Only abort the processing on a terminal error. - // TODO(v): add metrics for non-terminal errors - if err := checkTSDBAppendError(err); err != nil { - return false, err + if err != nil { + // Only abort the processing on a terminal error. + if err := checkTSDBAppendError(err); err != nil { + return false, err + } + discardedSamples++ } } // Exemplars and metadata are not persisted in the block. So we skip them. } + if discardedSamples > 0 { + partitionStr := fmt.Sprintf("%d", tenant.partitionID) + b.metrics.processSamplesDiscarded.WithLabelValues(partitionStr).Add(float64(discardedSamples)) + } + return allSamplesProcessed, app.Commit() } diff --git a/pkg/compactor/blocks_cleaner_test.go b/pkg/compactor/blocks_cleaner_test.go index ab1e6d59e76..cd7dee90368 100644 --- a/pkg/compactor/blocks_cleaner_test.go +++ b/pkg/compactor/blocks_cleaner_test.go @@ -464,7 +464,12 @@ func TestBlocksCleaner_ShouldNotCleanupUserThatDoesntBelongToShardAnymore(t *tes require.ElementsMatch(t, []string{"user-1", "user-2"}, cleaner.lastOwnedUsers) // But there are no metrics for any user, because we did not in fact clean them. - test.AssertGatherAndCompare(t, reg, "", "cortex_bucket_blocks_count") + assert.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(` + # HELP cortex_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks. + # TYPE cortex_bucket_blocks_count gauge + `), + "cortex_bucket_blocks_count", + )) // Running cleanUsers again will see that users are no longer owned. require.NoError(t, cleaner.runCleanupWithErr(ctx)) diff --git a/pkg/compactor/compactor_test.go b/pkg/compactor/compactor_test.go index ec087c181b1..bd4afca4eaf 100644 --- a/pkg/compactor/compactor_test.go +++ b/pkg/compactor/compactor_test.go @@ -967,7 +967,7 @@ func TestMultitenantCompactor_ShouldNotCompactBlocksForUsersMarkedForDeletion(t "cortex_compactor_block_cleanup_started_total", "cortex_compactor_block_cleanup_completed_total", "cortex_compactor_block_cleanup_failed_total", "cortex_bucket_blocks_count", "cortex_bucket_blocks_marked_for_deletion_count", "cortex_bucket_index_last_successful_update_timestamp_seconds", } - testutil.AssertGatherAndCompare(t, registry, ` + assert.NoError(t, prom_testutil.GatherAndCompare(registry, strings.NewReader(` # TYPE cortex_compactor_runs_started_total counter # HELP cortex_compactor_runs_started_total Total number of compaction runs started. cortex_compactor_runs_started_total 1 @@ -1006,7 +1006,7 @@ func TestMultitenantCompactor_ShouldNotCompactBlocksForUsersMarkedForDeletion(t # TYPE cortex_compactor_block_cleanup_failed_total counter # HELP cortex_compactor_block_cleanup_failed_total Total number of blocks cleanup runs failed. cortex_compactor_block_cleanup_failed_total 0 - `, testedMetrics...) + `), testedMetrics...)) } func TestMultitenantCompactor_ShouldCompactAllUsersOnShardingEnabledButOnlyOneInstanceRunning(t *testing.T) { diff --git a/pkg/distributor/distributor_test.go b/pkg/distributor/distributor_test.go index 72a5ad262c2..e6a26355d17 100644 --- a/pkg/distributor/distributor_test.go +++ b/pkg/distributor/distributor_test.go @@ -377,7 +377,7 @@ func TestDistributor_MetricsCleanup(t *testing.T) { d.latestSeenSampleTimestampPerUser.WithLabelValues("userA").Set(1111) d.labelValuesWithNewlinesPerUser.WithLabelValues("userA").Inc() - util_test.AssertGatherAndCompare(t, reg, ` + require.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(` # HELP cortex_distributor_deduped_samples_total The total number of deduplicated samples. # TYPE cortex_distributor_deduped_samples_total counter cortex_distributor_deduped_samples_total{cluster="cluster1",user="userA"} 1 @@ -420,11 +420,23 @@ func TestDistributor_MetricsCleanup(t *testing.T) { # HELP cortex_distributor_label_values_with_newlines_total Total number of label values with newlines seen at ingestion time. # TYPE cortex_distributor_label_values_with_newlines_total counter cortex_distributor_label_values_with_newlines_total{user="userA"} 1 - `, metrics...) + `), metrics...)) d.cleanupInactiveUser("userA") - util_test.AssertGatherAndCompare(t, reg, ` + require.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(` + # HELP cortex_distributor_deduped_samples_total The total number of deduplicated samples. + # TYPE cortex_distributor_deduped_samples_total counter + + # HELP cortex_distributor_latest_seen_sample_timestamp_seconds Unix timestamp of latest received sample per user. + # TYPE cortex_distributor_latest_seen_sample_timestamp_seconds gauge + + # HELP cortex_distributor_metadata_in_total The total number of metadata the have come in to the distributor, including rejected. + # TYPE cortex_distributor_metadata_in_total counter + + # HELP cortex_distributor_non_ha_samples_received_total The total number of received samples for a user that has HA tracking turned on, but the sample didn't contain both HA labels. + # TYPE cortex_distributor_non_ha_samples_received_total counter + # HELP cortex_distributor_received_metadata_total The total number of received metadata, excluding rejected. # TYPE cortex_distributor_received_metadata_total counter cortex_distributor_received_metadata_total{user="userB"} 10 @@ -436,7 +448,16 @@ func TestDistributor_MetricsCleanup(t *testing.T) { # HELP cortex_distributor_received_exemplars_total The total number of received exemplars, excluding rejected and deduped exemplars. # TYPE cortex_distributor_received_exemplars_total counter cortex_distributor_received_exemplars_total{user="userB"} 10 - `, metrics...) + + # HELP cortex_distributor_samples_in_total The total number of samples that have come in to the distributor, including rejected or deduped samples. + # TYPE cortex_distributor_samples_in_total counter + + # HELP cortex_distributor_exemplars_in_total The total number of exemplars that have come in to the distributor, including rejected or deduped exemplars. + # TYPE cortex_distributor_exemplars_in_total counter + + # HELP cortex_distributor_label_values_with_newlines_total Total number of label values with newlines seen at ingestion time. + # TYPE cortex_distributor_label_values_with_newlines_total counter + `), metrics...)) } func TestDistributor_PushRequestRateLimiter(t *testing.T) { @@ -1666,7 +1687,7 @@ func TestDistributor_ExemplarValidation(t *testing.T) { } assert.Equal(t, tc.expectedExemplars, tc.req.Timeseries) - util_test.AssertGatherAndCompare(t, regs[0], tc.expectedMetrics, "cortex_discarded_exemplars_total") + assert.NoError(t, testutil.GatherAndCompare(regs[0], strings.NewReader(tc.expectedMetrics), "cortex_discarded_exemplars_total")) }) } } @@ -7169,7 +7190,7 @@ func TestDistributor_StorageConfigMetrics(t *testing.T) { happyIngesters: 3, replicationFactor: 3, }) - util_test.AssertGatherAndCompare(t, regs[0], ` + assert.NoError(t, testutil.GatherAndCompare(regs[0], strings.NewReader(` # HELP cortex_distributor_replication_factor The configured replication factor. # TYPE cortex_distributor_replication_factor gauge cortex_distributor_replication_factor 3 @@ -7177,7 +7198,7 @@ func TestDistributor_StorageConfigMetrics(t *testing.T) { # HELP cortex_distributor_ingest_storage_enabled Whether writes are being processed via ingest storage. Equal to 1 if ingest storage is enabled, 0 if disabled. # TYPE cortex_distributor_ingest_storage_enabled gauge cortex_distributor_ingest_storage_enabled 0 - `, "cortex_distributor_replication_factor", "cortex_distributor_ingest_storage_enabled") + `), "cortex_distributor_replication_factor", "cortex_distributor_ingest_storage_enabled")) }) t.Run("migration to ingest storage", func(t *testing.T) { @@ -7190,7 +7211,7 @@ func TestDistributor_StorageConfigMetrics(t *testing.T) { happyIngesters: 3, replicationFactor: 3, }) - util_test.AssertGatherAndCompare(t, regs[0], ` + assert.NoError(t, testutil.GatherAndCompare(regs[0], strings.NewReader(` # HELP cortex_distributor_replication_factor The configured replication factor. # TYPE cortex_distributor_replication_factor gauge cortex_distributor_replication_factor 3 @@ -7198,7 +7219,7 @@ func TestDistributor_StorageConfigMetrics(t *testing.T) { # HELP cortex_distributor_ingest_storage_enabled Whether writes are being processed via ingest storage. Equal to 1 if ingest storage is enabled, 0 if disabled. # TYPE cortex_distributor_ingest_storage_enabled gauge cortex_distributor_ingest_storage_enabled 1 - `, "cortex_distributor_replication_factor", "cortex_distributor_ingest_storage_enabled") + `), "cortex_distributor_replication_factor", "cortex_distributor_ingest_storage_enabled")) }) t.Run("ingest storage", func(t *testing.T) { @@ -7210,11 +7231,11 @@ func TestDistributor_StorageConfigMetrics(t *testing.T) { happyIngesters: 3, replicationFactor: 3, }) - util_test.AssertGatherAndCompare(t, regs[0], ` + assert.NoError(t, testutil.GatherAndCompare(regs[0], strings.NewReader(` # HELP cortex_distributor_ingest_storage_enabled Whether writes are being processed via ingest storage. Equal to 1 if ingest storage is enabled, 0 if disabled. # TYPE cortex_distributor_ingest_storage_enabled gauge cortex_distributor_ingest_storage_enabled 1 - `, "cortex_distributor_replication_factor", "cortex_distributor_ingest_storage_enabled") + `), "cortex_distributor_replication_factor", "cortex_distributor_ingest_storage_enabled")) }) } diff --git a/pkg/frontend/v1/frontend_test.go b/pkg/frontend/v1/frontend_test.go index 4bb09c83cbe..f4c55b02e8b 100644 --- a/pkg/frontend/v1/frontend_test.go +++ b/pkg/frontend/v1/frontend_test.go @@ -40,7 +40,6 @@ import ( "github.com/grafana/mimir/pkg/querier/stats" querier_worker "github.com/grafana/mimir/pkg/querier/worker" "github.com/grafana/mimir/pkg/scheduler/queue" - util_test "github.com/grafana/mimir/pkg/util/test" ) const ( @@ -223,7 +222,10 @@ func TestFrontendMetricsCleanup(t *testing.T) { fr.cleanupInactiveUserMetrics("1") - util_test.AssertGatherAndCompare(t, reg, "", "cortex_query_frontend_queue_length") + require.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(` + # HELP cortex_query_frontend_queue_length Number of queries in the queue. + # TYPE cortex_query_frontend_queue_length gauge + `), "cortex_query_frontend_queue_length")) } testFrontend(t, defaultFrontendConfig(), handler, test, nil, reg) diff --git a/pkg/ingester/ingester_test.go b/pkg/ingester/ingester_test.go index 588433fbef0..2780367a3ee 100644 --- a/pkg/ingester/ingester_test.go +++ b/pkg/ingester/ingester_test.go @@ -3345,7 +3345,8 @@ func TestIngester_Push(t *testing.T) { mn := append(metricNames, testData.additionalMetrics...) // Check tracked Prometheus metrics - util_test.AssertGatherAndCompare(t, registry, testData.expectedMetrics, mn...) + err = testutil.GatherAndCompare(registry, strings.NewReader(testData.expectedMetrics), mn...) + assert.NoError(t, err) // Check anonymous usage stats. expectedTenantsCount := 0 @@ -3570,7 +3571,7 @@ func TestIngester_Push_DecreaseInactiveSeries(t *testing.T) { cortex_ingester_memory_series_removed_total{user="test-2"} 0 ` - util_test.AssertGatherAndCompare(t, registry, expectedMetrics, metricNames...) + assert.NoError(t, testutil.GatherAndCompare(registry, strings.NewReader(expectedMetrics), metricNames...)) } func BenchmarkIngesterPush(b *testing.B) { @@ -7267,7 +7268,7 @@ func TestIngesterCompactAndCloseIdleTSDB(t *testing.T) { metricsToCheck := []string{"cortex_ingester_memory_series_created_total", "cortex_ingester_memory_series_removed_total", "cortex_ingester_memory_users", "cortex_ingester_active_series", "cortex_ingester_memory_metadata", "cortex_ingester_memory_metadata_created_total", "cortex_ingester_memory_metadata_removed_total"} - util_test.AssertGatherAndCompare(t, r, ` + require.NoError(t, testutil.GatherAndCompare(r, strings.NewReader(` # HELP cortex_ingester_memory_series_created_total The total number of series that were created per user. # TYPE cortex_ingester_memory_series_created_total counter cortex_ingester_memory_series_created_total{user="1"} 1 @@ -7291,7 +7292,7 @@ func TestIngesterCompactAndCloseIdleTSDB(t *testing.T) { # HELP cortex_ingester_memory_metadata_created_total The total number of metadata that were created per user # TYPE cortex_ingester_memory_metadata_created_total counter cortex_ingester_memory_metadata_created_total{user="1"} 1 - `, metricsToCheck...) + `), metricsToCheck...)) // Wait until TSDB has been closed and removed. test.Poll(t, 20*time.Second, 0, func() interface{} { @@ -7305,22 +7306,31 @@ func TestIngesterCompactAndCloseIdleTSDB(t *testing.T) { require.Equal(t, int64(0), i.seriesCount.Load()) // Flushing removed all series from memory. // Verify that user has disappeared from metrics. - util_test.AssertGatherAndCompare(t, r, ` + require.NoError(t, testutil.GatherAndCompare(r, strings.NewReader(` + # HELP cortex_ingester_memory_series_created_total The total number of series that were created per user. + # TYPE cortex_ingester_memory_series_created_total counter + + # HELP cortex_ingester_memory_series_removed_total The total number of series that were removed per user. + # TYPE cortex_ingester_memory_series_removed_total counter + # HELP cortex_ingester_memory_users The current number of users in memory. # TYPE cortex_ingester_memory_users gauge cortex_ingester_memory_users 0 + # HELP cortex_ingester_active_series Number of currently active series per user. + # TYPE cortex_ingester_active_series gauge + # HELP cortex_ingester_memory_metadata The current number of metadata in memory. # TYPE cortex_ingester_memory_metadata gauge cortex_ingester_memory_metadata 0 - `, metricsToCheck...) + `), metricsToCheck...)) // Pushing another sample will recreate TSDB. pushSingleSampleWithMetadata(t, i) i.updateActiveSeries(time.Now()) // User is back. - util_test.AssertGatherAndCompare(t, r, ` + require.NoError(t, testutil.GatherAndCompare(r, strings.NewReader(` # HELP cortex_ingester_memory_series_created_total The total number of series that were created per user. # TYPE cortex_ingester_memory_series_created_total counter cortex_ingester_memory_series_created_total{user="1"} 1 @@ -7344,7 +7354,7 @@ func TestIngesterCompactAndCloseIdleTSDB(t *testing.T) { # HELP cortex_ingester_memory_metadata_created_total The total number of metadata that were created per user # TYPE cortex_ingester_memory_metadata_created_total counter cortex_ingester_memory_metadata_created_total{user="1"} 1 - `, metricsToCheck...) + `), metricsToCheck...)) } func verifyCompactedHead(t *testing.T, i *Ingester, expected bool) { @@ -8490,7 +8500,7 @@ func TestIngesterMetadataMetrics(t *testing.T) { "cortex_ingester_memory_metadata", } - util_test.AssertGatherAndCompare(t, reg, ` + assert.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(` # HELP cortex_ingester_memory_metadata The current number of metadata in memory. # TYPE cortex_ingester_memory_metadata gauge cortex_ingester_memory_metadata 90 @@ -8499,11 +8509,11 @@ func TestIngesterMetadataMetrics(t *testing.T) { cortex_ingester_memory_metadata_created_total{user="1"} 30 cortex_ingester_memory_metadata_created_total{user="2"} 30 cortex_ingester_memory_metadata_created_total{user="3"} 30 - `, metricNames...) + `), metricNames...)) time.Sleep(40 * time.Millisecond) ing.purgeUserMetricsMetadata() - util_test.AssertGatherAndCompare(t, reg, ` + assert.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(` # HELP cortex_ingester_memory_metadata The current number of metadata in memory. # TYPE cortex_ingester_memory_metadata gauge cortex_ingester_memory_metadata 0 @@ -8517,7 +8527,7 @@ func TestIngesterMetadataMetrics(t *testing.T) { cortex_ingester_memory_metadata_removed_total{user="1"} 30 cortex_ingester_memory_metadata_removed_total{user="2"} 30 cortex_ingester_memory_metadata_removed_total{user="3"} 30 - `, metricNames...) + `), metricNames...)) } @@ -8967,7 +8977,7 @@ func TestIngesterActiveSeries(t *testing.T) { ` // Check tracked Prometheus metrics - util_test.AssertGatherAndCompare(t, gatherer, expectedMetrics, metricNames...) + require.NoError(t, testutil.GatherAndCompare(gatherer, strings.NewReader(expectedMetrics), metricNames...)) }, }, "should cleanup metrics when tsdb closed": { @@ -9014,10 +9024,11 @@ func TestIngesterActiveSeries(t *testing.T) { ` // Check tracked Prometheus metrics - util_test.AssertGatherAndCompare(t, gatherer, expectedMetrics, metricNames...) + require.NoError(t, testutil.GatherAndCompare(gatherer, strings.NewReader(expectedMetrics), metricNames...)) // close tsdbs and check for cleanup ingester.closeAllTSDB() - util_test.AssertGatherAndCompare(t, gatherer, "", metricNames...) + expectedMetrics = "" + require.NoError(t, testutil.GatherAndCompare(gatherer, strings.NewReader(expectedMetrics), metricNames...)) }, }, "should track custom matchers, removing when zero": { @@ -9065,7 +9076,7 @@ func TestIngesterActiveSeries(t *testing.T) { ` // Check tracked Prometheus metrics - util_test.AssertGatherAndCompare(t, gatherer, expectedMetrics, metricNames...) + require.NoError(t, testutil.GatherAndCompare(gatherer, strings.NewReader(expectedMetrics), metricNames...)) // Pushing second time to have entires which are not going to be purged currentTime = time.Now() @@ -9103,12 +9114,12 @@ func TestIngesterActiveSeries(t *testing.T) { ` // Check tracked Prometheus metrics - util_test.AssertGatherAndCompare(t, gatherer, expectedMetrics, metricNames...) + require.NoError(t, testutil.GatherAndCompare(gatherer, strings.NewReader(expectedMetrics), metricNames...)) // Update active series again in a further future where no series are active anymore. currentTime = currentTime.Add(ingester.cfg.ActiveSeriesMetrics.IdleTimeout) ingester.updateActiveSeries(currentTime) - util_test.AssertGatherAndCompare(t, gatherer, "", metricNames...) + require.NoError(t, testutil.GatherAndCompare(gatherer, strings.NewReader(""), metricNames...)) }, }, "successful push, active series disabled": { @@ -9125,7 +9136,7 @@ func TestIngesterActiveSeries(t *testing.T) { expectedMetrics := `` // Check tracked Prometheus metrics - util_test.AssertGatherAndCompare(t, gatherer, expectedMetrics, metricNames...) + require.NoError(t, testutil.GatherAndCompare(gatherer, strings.NewReader(expectedMetrics), metricNames...)) // Check that no active series are returned matchers := []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "team", "a")} @@ -9331,7 +9342,7 @@ func TestIngesterActiveSeriesConfigChanges(t *testing.T) { cortex_ingester_active_native_histogram_buckets_custom_tracker{name="bool_is_true_flagbased",user="test_user"} 16 ` // Check tracked Prometheus metrics - util_test.AssertGatherAndCompare(t, gatherer, expectedMetrics, metricNames...) + require.NoError(t, testutil.GatherAndCompare(gatherer, strings.NewReader(expectedMetrics), metricNames...)) // Add new runtime configs activeSeriesTenantOverride := new(TenantLimitsMock) @@ -9372,7 +9383,7 @@ func TestIngesterActiveSeriesConfigChanges(t *testing.T) { cortex_ingester_active_native_histogram_buckets_custom_tracker{name="bool_is_false_flagbased",user="other_test_user"} 16 cortex_ingester_active_native_histogram_buckets_custom_tracker{name="bool_is_true_flagbased",user="other_test_user"} 16 ` - util_test.AssertGatherAndCompare(t, gatherer, expectedMetrics, metricNames...) + require.NoError(t, testutil.GatherAndCompare(gatherer, strings.NewReader(expectedMetrics), metricNames...)) // Saving time before second push to avoid purging it before exposing. currentTime = time.Now() @@ -9415,7 +9426,7 @@ func TestIngesterActiveSeriesConfigChanges(t *testing.T) { cortex_ingester_active_native_histogram_buckets_custom_tracker{name="team_a",user="test_user"} 16 cortex_ingester_active_native_histogram_buckets_custom_tracker{name="team_b",user="test_user"} 16 ` - util_test.AssertGatherAndCompare(t, gatherer, expectedMetrics, metricNames...) + require.NoError(t, testutil.GatherAndCompare(gatherer, strings.NewReader(expectedMetrics), metricNames...)) }, }, "remove runtime overwrite and revert to flag based config": { @@ -9465,7 +9476,7 @@ func TestIngesterActiveSeriesConfigChanges(t *testing.T) { cortex_ingester_active_native_histogram_buckets_custom_tracker{name="team_b",user="test_user"} 16 ` // Check tracked Prometheus metrics - util_test.AssertGatherAndCompare(t, gatherer, expectedMetrics, metricNames...) + require.NoError(t, testutil.GatherAndCompare(gatherer, strings.NewReader(expectedMetrics), metricNames...)) // Remove runtime configs limits := defaultLimitsTestConfig() @@ -9501,7 +9512,7 @@ func TestIngesterActiveSeriesConfigChanges(t *testing.T) { # TYPE cortex_ingester_active_series_loading gauge cortex_ingester_active_series_loading{user="test_user"} 1 ` - util_test.AssertGatherAndCompare(t, gatherer, expectedMetrics, metricNames...) + require.NoError(t, testutil.GatherAndCompare(gatherer, strings.NewReader(expectedMetrics), metricNames...)) // Saving time before second push to avoid purging it before exposing. currentTime = time.Now() @@ -9544,7 +9555,7 @@ func TestIngesterActiveSeriesConfigChanges(t *testing.T) { cortex_ingester_active_native_histogram_buckets_custom_tracker{name="bool_is_false_flagbased",user="test_user"} 16 cortex_ingester_active_native_histogram_buckets_custom_tracker{name="bool_is_true_flagbased",user="test_user"} 16 ` - util_test.AssertGatherAndCompare(t, gatherer, expectedMetrics, metricNames...) + require.NoError(t, testutil.GatherAndCompare(gatherer, strings.NewReader(expectedMetrics), metricNames...)) }, }, "changing runtime override should result in new metrics": { @@ -9582,7 +9593,7 @@ func TestIngesterActiveSeriesConfigChanges(t *testing.T) { cortex_ingester_active_native_histogram_buckets_custom_tracker{name="bool_is_true_flagbased",user="test_user"} 16 ` // Check tracked Prometheus metrics - util_test.AssertGatherAndCompare(t, gatherer, expectedMetrics, metricNames...) + require.NoError(t, testutil.GatherAndCompare(gatherer, strings.NewReader(expectedMetrics), metricNames...)) // Change runtime configs activeSeriesTenantOverride := new(TenantLimitsMock) @@ -9604,7 +9615,7 @@ func TestIngesterActiveSeriesConfigChanges(t *testing.T) { # TYPE cortex_ingester_active_series_loading gauge cortex_ingester_active_series_loading{user="test_user"} 1 ` - util_test.AssertGatherAndCompare(t, gatherer, expectedMetrics, metricNames...) + require.NoError(t, testutil.GatherAndCompare(gatherer, strings.NewReader(expectedMetrics), metricNames...)) // Saving time before second push to avoid purging it before exposing. currentTime = time.Now() @@ -9643,7 +9654,7 @@ func TestIngesterActiveSeriesConfigChanges(t *testing.T) { cortex_ingester_active_native_histogram_buckets_custom_tracker{name="team_c",user="test_user"} 16 cortex_ingester_active_native_histogram_buckets_custom_tracker{name="team_d",user="test_user"} 16 ` - util_test.AssertGatherAndCompare(t, gatherer, expectedMetrics, metricNames...) + require.NoError(t, testutil.GatherAndCompare(gatherer, strings.NewReader(expectedMetrics), metricNames...)) }, }, "should cleanup loading metric at close": { @@ -9693,7 +9704,7 @@ func TestIngesterActiveSeriesConfigChanges(t *testing.T) { cortex_ingester_active_native_histogram_buckets_custom_tracker{name="team_b",user="test_user"} 16 ` // Check tracked Prometheus metrics - util_test.AssertGatherAndCompare(t, gatherer, expectedMetrics, metricNames...) + require.NoError(t, testutil.GatherAndCompare(gatherer, strings.NewReader(expectedMetrics), metricNames...)) // Remove all configs limits := defaultLimitsTestConfig() @@ -9707,11 +9718,11 @@ func TestIngesterActiveSeriesConfigChanges(t *testing.T) { cortex_ingester_active_series_loading{user="test_user"} 1 cortex_ingester_active_series_loading{user="other_test_user"} 1 ` - util_test.AssertGatherAndCompare(t, gatherer, expectedMetrics, metricNames...) + require.NoError(t, testutil.GatherAndCompare(gatherer, strings.NewReader(expectedMetrics), metricNames...)) ingester.closeAllTSDB() expectedMetrics = ` ` - util_test.AssertGatherAndCompare(t, gatherer, expectedMetrics, metricNames...) + require.NoError(t, testutil.GatherAndCompare(gatherer, strings.NewReader(expectedMetrics), metricNames...)) }, }, } @@ -10938,7 +10949,8 @@ func TestIngester_PushWithSampledErrors(t *testing.T) { } // Check tracked Prometheus metrics - util_test.AssertGatherAndCompare(t, registry, testData.expectedMetrics, metricNames...) + err = testutil.GatherAndCompare(registry, strings.NewReader(testData.expectedMetrics), metricNames...) + assert.NoError(t, err) }) } } diff --git a/pkg/ruler/rulestore/config.go b/pkg/ruler/rulestore/config.go index b02c2f96f19..2267baf51e8 100644 --- a/pkg/ruler/rulestore/config.go +++ b/pkg/ruler/rulestore/config.go @@ -29,8 +29,21 @@ type Config struct { bucket.Config `yaml:",inline"` Local LocalStoreConfig `yaml:"local"` + // RulerCache holds the configuration used for the ruler storage cache. + RulerCache RulerCacheConfig `yaml:"cache"` +} + +// RulerCacheConfig is configuration for the cache used by ruler storage as well as +// additional ruler storage specific configuration. +// +// NOTE: This is temporary while caching of rule groups is being tested. This will be removed +// in the future and cache.BackendConfig will be moved back to the Config struct above. +type RulerCacheConfig struct { + // RuleGroupEnabled enables caching of rule group contents + RuleGroupEnabled bool `yaml:"rule_group_enabled" category:"experimental"` + // Cache holds the configuration used for the ruler storage cache. - Cache cache.BackendConfig `yaml:"cache"` + Cache cache.BackendConfig `yaml:",inline"` } // RegisterFlags registers the backend storage config. @@ -41,9 +54,10 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { cfg.Local.RegisterFlagsWithPrefix(prefix, f) cfg.RegisterFlagsWithPrefixAndDefaultDirectory(prefix, "ruler", f) - f.StringVar(&cfg.Cache.Backend, prefix+"cache.backend", "", fmt.Sprintf("Backend for ruler storage cache, if not empty. The cache is supported for any storage backend except %q. Supported values: %s.", BackendLocal, strings.Join(supportedCacheBackends, ", "))) - cfg.Cache.Memcached.RegisterFlagsWithPrefix(prefix+"cache.memcached.", f) - cfg.Cache.Redis.RegisterFlagsWithPrefix(prefix+"cache.redis.", f) + f.BoolVar(&cfg.RulerCache.RuleGroupEnabled, prefix+"cache.rule-group-enabled", false, "Enabling caching of rule group contents if a cache backend is configured.") + f.StringVar(&cfg.RulerCache.Cache.Backend, prefix+"cache.backend", "", fmt.Sprintf("Backend for ruler storage cache, if not empty. The cache is supported for any storage backend except %q. Supported values: %s.", BackendLocal, strings.Join(supportedCacheBackends, ", "))) + cfg.RulerCache.Cache.Memcached.RegisterFlagsWithPrefix(prefix+"cache.memcached.", f) + cfg.RulerCache.Cache.Redis.RegisterFlagsWithPrefix(prefix+"cache.redis.", f) } func (cfg *Config) Validate() error { @@ -51,7 +65,7 @@ func (cfg *Config) Validate() error { return err } - return cfg.Cache.Validate() + return cfg.RulerCache.Cache.Validate() } // IsDefaults returns true if the storage options have not been set. diff --git a/pkg/ruler/storage.go b/pkg/ruler/storage.go index 13dc1a32bed..74140a2b1d0 100644 --- a/pkg/ruler/storage.go +++ b/pkg/ruler/storage.go @@ -60,7 +60,7 @@ func wrapBucketWithCache(bkt objstore.Bucket, cfg rulestore.Config, cacheTTL tim cacheCfg := bucketcache.NewCachingBucketConfig() - cacheClient, err := cache.CreateClient("ruler-storage-cache", cfg.Cache, logger, prometheus.WrapRegistererWithPrefix("thanos_", reg)) + cacheClient, err := cache.CreateClient("ruler-storage-cache", cfg.RulerCache.Cache, logger, prometheus.WrapRegistererWithPrefix("thanos_", reg)) if err != nil { return nil, errors.Wrapf(err, "ruler-storage-cache") } @@ -77,6 +77,12 @@ func wrapBucketWithCache(bkt objstore.Bucket, cfg rulestore.Config, cacheTTL tim codec := bucketcache.SnappyIterCodec{IterCodec: bucketcache.JSONIterCodec{}} cacheCfg.CacheIter("iter", cacheClient, isNotTenantsDir, cacheTTL, codec) + // Only cache the contents of rule groups if enabled. This is an experimental feature and we need to be able + // to disable it. Once this feature is validated, it will be enabled unconditionally. + if cfg.RulerCache.RuleGroupEnabled { + cacheCfg.CacheGet("rule-group", cacheClient, isRuleGroup, maxItemSize(cfg.RulerCache.Cache), cacheTTL, cacheTTL, cacheTTL) + } + return bucketcache.NewCachingBucket("ruler", bkt, cacheCfg, logger, reg) } @@ -87,3 +93,14 @@ func isNotTenantsDir(name string) bool { func isRuleGroup(name string) bool { return strings.HasPrefix(name, "rules/") } + +func maxItemSize(cfg cache.BackendConfig) int { + switch cfg.Backend { + case cache.BackendMemcached: + return cfg.Memcached.MaxItemSize + case cache.BackendRedis: + return cfg.Redis.MaxItemSize + default: + return 0 + } +} diff --git a/pkg/storage/ingest/partition_offset_client_test.go b/pkg/storage/ingest/partition_offset_client_test.go index eddab581fb6..1017f4cdd79 100644 --- a/pkg/storage/ingest/partition_offset_client_test.go +++ b/pkg/storage/ingest/partition_offset_client_test.go @@ -20,7 +20,6 @@ import ( "github.com/twmb/franz-go/pkg/kmsg" "go.uber.org/atomic" - "github.com/grafana/mimir/pkg/util/test" "github.com/grafana/mimir/pkg/util/testkafka" ) @@ -410,9 +409,9 @@ func TestPartitionOffsetClient_FetchPartitionsLastProducedOffsets(t *testing.T) require.NoError(t, err) assert.Empty(t, offsets) - test.AssertGatherAndCompare(t, reg, "", + assert.NoError(t, promtest.GatherAndCompare(reg, strings.NewReader(""), "cortex_ingest_storage_reader_last_produced_offset_requests_total", - "cortex_ingest_storage_reader_last_produced_offset_failures_total") + "cortex_ingest_storage_reader_last_produced_offset_failures_total")) }) t.Run("should honor context deadline and not fail other in-flight requests issued while the canceled one was still running", func(t *testing.T) { diff --git a/pkg/streamingpromql/benchmarks/benchmarks.go b/pkg/streamingpromql/benchmarks/benchmarks.go index f869dd5e836..96a5022aca2 100644 --- a/pkg/streamingpromql/benchmarks/benchmarks.go +++ b/pkg/streamingpromql/benchmarks/benchmarks.go @@ -183,6 +183,9 @@ func TestCases(metricSizes []int) []BenchCase { { Expr: "abs(a_X)", }, + { + Expr: "clamp(a_X, 100, 1000)", + }, { Expr: "label_replace(a_X, 'l2', '$1', 'l', '(.*)')", }, diff --git a/pkg/streamingpromql/compat/fallback_engine_test.go b/pkg/streamingpromql/compat/fallback_engine_test.go index c00adb4795d..91c4cfb85a4 100644 --- a/pkg/streamingpromql/compat/fallback_engine_test.go +++ b/pkg/streamingpromql/compat/fallback_engine_test.go @@ -5,18 +5,18 @@ package compat import ( "context" "errors" + "strings" "testing" "time" "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" + promtest "github.com/prometheus/client_golang/prometheus/testutil" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/util/stats" "github.com/stretchr/testify/require" - - util_test "github.com/grafana/mimir/pkg/util/test" ) func TestEngineWithFallback(t *testing.T) { @@ -45,11 +45,11 @@ func TestEngineWithFallback(t *testing.T) { require.Equal(t, preferredEngine.query, query, "should return query from preferred engine") require.False(t, fallbackEngine.wasCalled, "should not call fallback engine if expression is supported by preferred engine") - util_test.AssertGatherAndCompare(t, reg, ` + require.NoError(t, promtest.GatherAndCompare(reg, strings.NewReader(` # HELP cortex_mimir_query_engine_supported_queries_total Total number of queries that were supported by the Mimir query engine. # TYPE cortex_mimir_query_engine_supported_queries_total counter cortex_mimir_query_engine_supported_queries_total 1 - `, "cortex_mimir_query_engine_supported_queries_total", "cortex_mimir_query_engine_unsupported_queries_total") + `), "cortex_mimir_query_engine_supported_queries_total", "cortex_mimir_query_engine_unsupported_queries_total")) }) t.Run("should fall back for unsupported expressions", func(t *testing.T) { @@ -63,14 +63,14 @@ func TestEngineWithFallback(t *testing.T) { require.NoError(t, err) require.Equal(t, fallbackEngine.query, query, "should return query from fallback engine if expression is not supported by preferred engine") - util_test.AssertGatherAndCompare(t, reg, ` + require.NoError(t, promtest.GatherAndCompare(reg, strings.NewReader(` # HELP cortex_mimir_query_engine_supported_queries_total Total number of queries that were supported by the Mimir query engine. # TYPE cortex_mimir_query_engine_supported_queries_total counter cortex_mimir_query_engine_supported_queries_total 0 # HELP cortex_mimir_query_engine_unsupported_queries_total Total number of queries that were not supported by the Mimir query engine and so fell back to Prometheus' engine. # TYPE cortex_mimir_query_engine_unsupported_queries_total counter cortex_mimir_query_engine_unsupported_queries_total{reason="this expression is not supported"} 1 - `, "cortex_mimir_query_engine_supported_queries_total", "cortex_mimir_query_engine_unsupported_queries_total") + `), "cortex_mimir_query_engine_supported_queries_total", "cortex_mimir_query_engine_unsupported_queries_total")) }) t.Run("should not fall back if creating query fails for another reason", func(t *testing.T) { @@ -96,14 +96,14 @@ func TestEngineWithFallback(t *testing.T) { require.NoError(t, err) require.Equal(t, fallbackEngine.query, query, "should return query from fallback engine if expression is not supported by preferred engine") - util_test.AssertGatherAndCompare(t, reg, ` + require.NoError(t, promtest.GatherAndCompare(reg, strings.NewReader(` # HELP cortex_mimir_query_engine_supported_queries_total Total number of queries that were supported by the Mimir query engine. # TYPE cortex_mimir_query_engine_supported_queries_total counter cortex_mimir_query_engine_supported_queries_total 0 # HELP cortex_mimir_query_engine_unsupported_queries_total Total number of queries that were not supported by the Mimir query engine and so fell back to Prometheus' engine. # TYPE cortex_mimir_query_engine_unsupported_queries_total counter cortex_mimir_query_engine_unsupported_queries_total{reason="fallback forced by HTTP header"} 1 - `, "cortex_mimir_query_engine_supported_queries_total", "cortex_mimir_query_engine_unsupported_queries_total") + `), "cortex_mimir_query_engine_supported_queries_total", "cortex_mimir_query_engine_unsupported_queries_total")) }) }) } diff --git a/pkg/streamingpromql/functions.go b/pkg/streamingpromql/functions.go index 1a7cdd38f45..b854ba4d894 100644 --- a/pkg/streamingpromql/functions.go +++ b/pkg/streamingpromql/functions.go @@ -49,7 +49,7 @@ func SingleInputVectorFunctionOperatorFactory(name string, f functions.FunctionO return nil, fmt.Errorf("expected an instant vector argument for %s, got %T", name, args[0]) } - var o types.InstantVectorOperator = operators.NewFunctionOverInstantVector(inner, memoryConsumptionTracker, f, expressionPosition) + var o types.InstantVectorOperator = operators.NewFunctionOverInstantVector(inner, nil, memoryConsumptionTracker, f, expressionPosition) if f.SeriesMetadataFunction.NeedsSeriesDeduplication { o = operators.NewDeduplicateAndMerge(o, memoryConsumptionTracker) @@ -148,7 +148,7 @@ func LabelReplaceFunctionOperatorFactory() InstantVectorFunctionOperatorFactory inner, ok := args[0].(types.InstantVectorOperator) if !ok { // Should be caught by the PromQL parser, but we check here for safety. - return nil, fmt.Errorf("expected a range vector for 1st argument for label_replace, got %T", args[0]) + return nil, fmt.Errorf("expected an instant vector for 1st argument for label_replace, got %T", args[0]) } dstLabel, ok := args[1].(types.StringOperator) @@ -183,12 +183,46 @@ func LabelReplaceFunctionOperatorFactory() InstantVectorFunctionOperatorFactory }, } - o := operators.NewFunctionOverInstantVector(inner, memoryConsumptionTracker, f, expressionPosition) + o := operators.NewFunctionOverInstantVector(inner, nil, memoryConsumptionTracker, f, expressionPosition) return operators.NewDeduplicateAndMerge(o, memoryConsumptionTracker), nil } } +func ClampFunctionOperatorFactory() InstantVectorFunctionOperatorFactory { + return func(args []types.Operator, memoryConsumptionTracker *limiting.MemoryConsumptionTracker, _ *annotations.Annotations, expressionPosition posrange.PositionRange) (types.InstantVectorOperator, error) { + if len(args) != 3 { + // Should be caught by the PromQL parser, but we check here for safety. + return nil, fmt.Errorf("expected exactly 3 argument for clamp, got %v", len(args)) + } + + inner, ok := args[0].(types.InstantVectorOperator) + if !ok { + // Should be caught by the PromQL parser, but we check here for safety. + return nil, fmt.Errorf("expected an instant vector for 1st argument for clamp, got %T", args[0]) + } + + min, ok := args[1].(types.ScalarOperator) + if !ok { + // Should be caught by the PromQL parser, but we check here for safety. + return nil, fmt.Errorf("expected a scalar for 2nd argument for clamp, got %T", args[1]) + } + + max, ok := args[2].(types.ScalarOperator) + if !ok { + // Should be caught by the PromQL parser, but we check here for safety. + return nil, fmt.Errorf("expected a scalar for 3rd argument for clamp, got %T", args[2]) + } + + f := functions.FunctionOverInstantVector{ + SeriesDataFunc: functions.Clamp, + SeriesMetadataFunction: functions.DropSeriesName, + } + + return operators.NewFunctionOverInstantVector(inner, []types.ScalarOperator{min, max}, memoryConsumptionTracker, f, expressionPosition), nil + } +} + // These functions return an instant-vector. var instantVectorFunctionOperatorFactories = map[string]InstantVectorFunctionOperatorFactory{ // Please keep this list sorted alphabetically. @@ -202,6 +236,7 @@ var instantVectorFunctionOperatorFactories = map[string]InstantVectorFunctionOpe "atanh": InstantVectorTransformationFunctionOperatorFactory("atanh", functions.Atanh), "avg_over_time": FunctionOverRangeVectorOperatorFactory("avg_over_time", functions.AvgOverTime), "ceil": InstantVectorTransformationFunctionOperatorFactory("ceil", functions.Ceil), + "clamp": ClampFunctionOperatorFactory(), "cos": InstantVectorTransformationFunctionOperatorFactory("cos", functions.Cos), "cosh": InstantVectorTransformationFunctionOperatorFactory("cosh", functions.Cosh), "count_over_time": FunctionOverRangeVectorOperatorFactory("count_over_time", functions.CountOverTime), @@ -286,6 +321,6 @@ func unaryNegationOfInstantVectorOperatorFactory(inner types.InstantVectorOperat SeriesMetadataFunction: functions.DropSeriesName, } - o := operators.NewFunctionOverInstantVector(inner, memoryConsumptionTracker, f, expressionPosition) + o := operators.NewFunctionOverInstantVector(inner, nil, memoryConsumptionTracker, f, expressionPosition) return operators.NewDeduplicateAndMerge(o, memoryConsumptionTracker) } diff --git a/pkg/streamingpromql/functions/common.go b/pkg/streamingpromql/functions/common.go index eddb42ab2ee..021421d614c 100644 --- a/pkg/streamingpromql/functions/common.go +++ b/pkg/streamingpromql/functions/common.go @@ -26,11 +26,11 @@ var DropSeriesName = SeriesMetadataFunctionDefinition{ } // InstantVectorSeriesFunction is a function that takes in an instant vector and produces an instant vector. -type InstantVectorSeriesFunction func(seriesData types.InstantVectorSeriesData, memoryConsumptionTracker *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, error) +type InstantVectorSeriesFunction func(seriesData types.InstantVectorSeriesData, scalarArgsData []types.ScalarData, memoryConsumptionTracker *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, error) // floatTransformationFunc is not needed elsewhere, so it is not exported yet func floatTransformationFunc(transform func(f float64) float64) InstantVectorSeriesFunction { - return func(seriesData types.InstantVectorSeriesData, _ *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, error) { + return func(seriesData types.InstantVectorSeriesData, _ []types.ScalarData, _ *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, error) { for i := range seriesData.Floats { seriesData.Floats[i].F = transform(seriesData.Floats[i].F) } @@ -40,16 +40,16 @@ func floatTransformationFunc(transform func(f float64) float64) InstantVectorSer func FloatTransformationDropHistogramsFunc(transform func(f float64) float64) InstantVectorSeriesFunction { ft := floatTransformationFunc(transform) - return func(seriesData types.InstantVectorSeriesData, memoryConsumptionTracker *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, error) { + return func(seriesData types.InstantVectorSeriesData, _ []types.ScalarData, memoryConsumptionTracker *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, error) { // Functions that do not explicitly mention native histograms in their documentation will ignore histogram samples. // https://prometheus.io/docs/prometheus/latest/querying/functions types.HPointSlicePool.Put(seriesData.Histograms, memoryConsumptionTracker) seriesData.Histograms = nil - return ft(seriesData, memoryConsumptionTracker) + return ft(seriesData, nil, memoryConsumptionTracker) } } -func PassthroughData(seriesData types.InstantVectorSeriesData, _ *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, error) { +func PassthroughData(seriesData types.InstantVectorSeriesData, _ []types.ScalarData, _ *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, error) { return seriesData, nil } diff --git a/pkg/streamingpromql/functions/common_test.go b/pkg/streamingpromql/functions/common_test.go index 2853aa3a135..64e51e11a5d 100644 --- a/pkg/streamingpromql/functions/common_test.go +++ b/pkg/streamingpromql/functions/common_test.go @@ -57,7 +57,7 @@ func TestFloatTransformationFunc(t *testing.T) { }, } - modifiedSeriesData, err := transformFunc(seriesData, memoryConsumptionTracker) + modifiedSeriesData, err := transformFunc(seriesData, nil, memoryConsumptionTracker) require.NoError(t, err) require.Equal(t, expected, modifiedSeriesData) require.Equal(t, types.FPointSize*2+types.HPointSize*1, memoryConsumptionTracker.CurrentEstimatedMemoryConsumptionBytes) @@ -88,7 +88,7 @@ func TestFloatTransformationDropHistogramsFunc(t *testing.T) { Histograms: nil, // Histograms should be dropped } - modifiedSeriesData, err := transformFunc(seriesData, memoryConsumptionTracker) + modifiedSeriesData, err := transformFunc(seriesData, nil, memoryConsumptionTracker) require.NoError(t, err) require.Equal(t, expected, modifiedSeriesData) // We expect the dropped histogram to be returned to the pool diff --git a/pkg/streamingpromql/functions/math.go b/pkg/streamingpromql/functions/math.go index f13a7de32b5..3a57d640e7a 100644 --- a/pkg/streamingpromql/functions/math.go +++ b/pkg/streamingpromql/functions/math.go @@ -52,7 +52,7 @@ var Sgn = FloatTransformationDropHistogramsFunc(func(f float64) float64 { return f }) -var UnaryNegation InstantVectorSeriesFunction = func(seriesData types.InstantVectorSeriesData, _ *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, error) { +var UnaryNegation InstantVectorSeriesFunction = func(seriesData types.InstantVectorSeriesData, _ []types.ScalarData, _ *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, error) { for i := range seriesData.Floats { seriesData.Floats[i].F = -seriesData.Floats[i].F } @@ -63,3 +63,26 @@ var UnaryNegation InstantVectorSeriesFunction = func(seriesData types.InstantVec return seriesData, nil } + +var Clamp InstantVectorSeriesFunction = func(seriesData types.InstantVectorSeriesData, scalarArgsData []types.ScalarData, memoryConsumptionTracker *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, error) { + outputIdx := 0 + minArg := scalarArgsData[0] + maxArg := scalarArgsData[1] + for step, data := range seriesData.Floats { + minVal := minArg.Samples[step].F + maxVal := maxArg.Samples[step].F + if maxVal < minVal { + // Drop this point as there is no valid answer + continue + } + // We reuse the existing FPoint slice in place + seriesData.Floats[outputIdx].T = data.T + seriesData.Floats[outputIdx].F = max(minVal, min(maxVal, data.F)) + outputIdx++ + } + seriesData.Floats = seriesData.Floats[:outputIdx] + // Histograms are dropped from clamp + types.HPointSlicePool.Put(seriesData.Histograms, memoryConsumptionTracker) + seriesData.Histograms = nil + return seriesData, nil +} diff --git a/pkg/streamingpromql/functions/native_histograms.go b/pkg/streamingpromql/functions/native_histograms.go index e4829a3255e..5be03404e1e 100644 --- a/pkg/streamingpromql/functions/native_histograms.go +++ b/pkg/streamingpromql/functions/native_histograms.go @@ -13,7 +13,7 @@ import ( "github.com/grafana/mimir/pkg/streamingpromql/types" ) -func HistogramCount(seriesData types.InstantVectorSeriesData, memoryConsumptionTracker *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, error) { +func HistogramCount(seriesData types.InstantVectorSeriesData, _ []types.ScalarData, memoryConsumptionTracker *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, error) { floats, err := types.FPointSlicePool.Get(len(seriesData.Histograms), memoryConsumptionTracker) if err != nil { return types.InstantVectorSeriesData{}, err @@ -35,7 +35,7 @@ func HistogramCount(seriesData types.InstantVectorSeriesData, memoryConsumptionT return data, nil } -func HistogramSum(seriesData types.InstantVectorSeriesData, memoryConsumptionTracker *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, error) { +func HistogramSum(seriesData types.InstantVectorSeriesData, _ []types.ScalarData, memoryConsumptionTracker *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, error) { floats, err := types.FPointSlicePool.Get(len(seriesData.Histograms), memoryConsumptionTracker) if err != nil { return types.InstantVectorSeriesData{}, err diff --git a/pkg/streamingpromql/operators/function_over_instant_vector.go b/pkg/streamingpromql/operators/function_over_instant_vector.go index 915cf237126..afe6e39094c 100644 --- a/pkg/streamingpromql/operators/function_over_instant_vector.go +++ b/pkg/streamingpromql/operators/function_over_instant_vector.go @@ -21,10 +21,17 @@ type FunctionOverInstantVector struct { // At the moment no instant-vector promql function takes more than one instant-vector // as an argument. We can assume this will always be the Inner operator and therefore // what we use for the SeriesMetadata. - Inner types.InstantVectorOperator + Inner types.InstantVectorOperator + // Any scalar arguments will be read once and passed to Func.SeriesDataFunc. + ScalarArgs []types.ScalarOperator MemoryConsumptionTracker *limiting.MemoryConsumptionTracker Func functions.FunctionOverInstantVector + // scalarArgsData stores the processed ScalarArgs during SeriesMetadata. + // The order of scalarArgsData matches the order of ScalarArgs. + // These are returned the pool at Close(). + scalarArgsData []types.ScalarData + expressionPosition posrange.PositionRange } @@ -32,12 +39,14 @@ var _ types.InstantVectorOperator = &FunctionOverInstantVector{} func NewFunctionOverInstantVector( inner types.InstantVectorOperator, + scalarArgs []types.ScalarOperator, memoryConsumptionTracker *limiting.MemoryConsumptionTracker, f functions.FunctionOverInstantVector, expressionPosition posrange.PositionRange, ) *FunctionOverInstantVector { return &FunctionOverInstantVector{ Inner: inner, + ScalarArgs: scalarArgs, MemoryConsumptionTracker: memoryConsumptionTracker, Func: f, @@ -49,7 +58,30 @@ func (m *FunctionOverInstantVector) ExpressionPosition() posrange.PositionRange return m.expressionPosition } +func (m *FunctionOverInstantVector) processScalarArgs(ctx context.Context) error { + if len(m.ScalarArgs) == 0 { + return nil + } + + m.scalarArgsData = make([]types.ScalarData, 0, len(m.ScalarArgs)) + for _, so := range m.ScalarArgs { + sd, err := so.GetValues(ctx) + if err != nil { + return err + } + m.scalarArgsData = append(m.scalarArgsData, sd) + } + + return nil +} + func (m *FunctionOverInstantVector) SeriesMetadata(ctx context.Context) ([]types.SeriesMetadata, error) { + // Pre-process any Scalar arguments + err := m.processScalarArgs(ctx) + if err != nil { + return nil, err + } + metadata, err := m.Inner.SeriesMetadata(ctx) if err != nil { return nil, err @@ -68,9 +100,12 @@ func (m *FunctionOverInstantVector) NextSeries(ctx context.Context) (types.Insta return types.InstantVectorSeriesData{}, err } - return m.Func.SeriesDataFunc(series, m.MemoryConsumptionTracker) + return m.Func.SeriesDataFunc(series, m.scalarArgsData, m.MemoryConsumptionTracker) } func (m *FunctionOverInstantVector) Close() { m.Inner.Close() + for _, sd := range m.scalarArgsData { + types.FPointSlicePool.Put(sd.Samples, m.MemoryConsumptionTracker) + } } diff --git a/pkg/streamingpromql/operators/function_over_instant_vector_test.go b/pkg/streamingpromql/operators/function_over_instant_vector_test.go index ec143cc6809..d4a3e365101 100644 --- a/pkg/streamingpromql/operators/function_over_instant_vector_test.go +++ b/pkg/streamingpromql/operators/function_over_instant_vector_test.go @@ -8,6 +8,7 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/promql" + "github.com/prometheus/prometheus/promql/parser/posrange" "github.com/stretchr/testify/require" "github.com/grafana/mimir/pkg/streamingpromql/functions" @@ -38,7 +39,7 @@ func TestFunctionOverInstantVector(t *testing.T) { } seriesDataFuncCalledTimes := 0 - mustBeCalledSeriesData := func(types.InstantVectorSeriesData, *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, error) { + mustBeCalledSeriesData := func(types.InstantVectorSeriesData, []types.ScalarData, *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, error) { seriesDataFuncCalledTimes++ return types.InstantVectorSeriesData{}, nil } @@ -62,3 +63,68 @@ func TestFunctionOverInstantVector(t *testing.T) { require.True(t, metadataFuncCalled, "Supplied MetadataFunc must be called matching the signature") require.Equal(t, len(inner.data), seriesDataFuncCalledTimes, "Supplied SeriesDataFunc was called once for each Series") } + +func TestFunctionOverInstantVectorWithScalarArgs(t *testing.T) { + inner := &testOperator{ + series: []labels.Labels{ + labels.FromStrings("series", "0"), + labels.FromStrings("series", "1"), + }, + data: []types.InstantVectorSeriesData{ + {Floats: []promql.FPoint{{T: 0, F: 1}}}, + {Floats: []promql.FPoint{{T: 0, F: 2}}}, + }, + } + + scalarOperator1 := &testScalarOperator{ + value: types.ScalarData{Samples: []promql.FPoint{{T: 0, F: 3}}}, + } + + scalarOperator2 := &testScalarOperator{ + value: types.ScalarData{Samples: []promql.FPoint{{T: 60, F: 4}}}, + } + + seriesDataFuncCalledTimes := 0 + mustBeCalledSeriesData := func(_ types.InstantVectorSeriesData, scalarArgs []types.ScalarData, _ *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, error) { + seriesDataFuncCalledTimes++ + // Verify that the scalar arguments are correctly passed and in the order we expect + require.Equal(t, 2, len(scalarArgs)) + require.Equal(t, types.ScalarData{Samples: []promql.FPoint{{T: 0, F: 3}}}, scalarArgs[0]) + require.Equal(t, types.ScalarData{Samples: []promql.FPoint{{T: 60, F: 4}}}, scalarArgs[1]) + return types.InstantVectorSeriesData{}, nil + } + + operator := &FunctionOverInstantVector{ + Inner: inner, + ScalarArgs: []types.ScalarOperator{scalarOperator1, scalarOperator2}, + MemoryConsumptionTracker: limiting.NewMemoryConsumptionTracker(0, nil), + Func: functions.FunctionOverInstantVector{ + SeriesDataFunc: mustBeCalledSeriesData, + SeriesMetadataFunction: functions.DropSeriesName, + }, + } + + ctx := context.TODO() + // SeriesMetadata should process scalar args + _, err := operator.SeriesMetadata(ctx) + require.NoError(t, err) + + // NextSeries should pass scalarArgsData to SeriesDataFunc, which validates the arguments + _, err = operator.NextSeries(ctx) + require.NoError(t, err) + require.Equal(t, len(inner.data), seriesDataFuncCalledTimes, "Supplied SeriesDataFunc was called once for each Series") +} + +type testScalarOperator struct { + value types.ScalarData +} + +func (t *testScalarOperator) GetValues(_ context.Context) (types.ScalarData, error) { + return t.value, nil +} + +func (t *testScalarOperator) ExpressionPosition() posrange.PositionRange { + return posrange.PositionRange{} +} + +func (t *testScalarOperator) Close() {} diff --git a/pkg/streamingpromql/testdata/ours-only/native_histograms.test b/pkg/streamingpromql/testdata/ours-only/native_histograms.test new file mode 100644 index 00000000000..b156535840d --- /dev/null +++ b/pkg/streamingpromql/testdata/ours-only/native_histograms.test @@ -0,0 +1,9 @@ +# Test metric with mixed floats and histograms +load 1m + mixed_metric {{schema:0 sum:5 count:4 buckets:[1 2 1]}} 1 2 3 {{schema:0 sum:5 count:4 buckets:[1 2 1]}} {{schema:0 sum:8 count:6 buckets:[1 4 1]}} + +# clamp ignores any histograms +# Prometheus currently returns min instead of no value as per the documentation +# https://github.com/prometheus/prometheus/pull/15169 +eval range from 0 to 5m step 1m clamp(mixed_metric, 2, 5) + {} _ 2 2 3 diff --git a/pkg/streamingpromql/testdata/ours/functions.test b/pkg/streamingpromql/testdata/ours/functions.test index 995c19c3b3e..f80123e35db 100644 --- a/pkg/streamingpromql/testdata/ours/functions.test +++ b/pkg/streamingpromql/testdata/ours/functions.test @@ -404,3 +404,13 @@ eval_fail instant at 5m tan({__name__=~"float_metric_.*"}) eval_fail instant at 5m tanh({__name__=~"float_metric_.*"}) expected_fail_message vector cannot contain metrics with the same labelset + +clear + +load 5m + mins 0 10 0 30 0 -10 0 5 NaN 0 NaN NaN + maxes 10 20 5 0 -10 -5 40 50 10 NaN NaN NaN + series -10 0 10 20 30 40 50 NaN 60 70 80 NaN + +eval range from 0 to 60m step 5m clamp(series, scalar(mins), scalar(maxes)) + {} 0 10 5 _ _ -5 40 NaN NaN NaN NaN NaN NaN diff --git a/pkg/streamingpromql/testdata/ours/native_histograms.test b/pkg/streamingpromql/testdata/ours/native_histograms.test index ad9373fe7ad..614dbfe1477 100644 --- a/pkg/streamingpromql/testdata/ours/native_histograms.test +++ b/pkg/streamingpromql/testdata/ours/native_histograms.test @@ -37,10 +37,10 @@ eval instant at 4m histogram_sum(mixed_metric) {} 8 eval range from 0 to 5m step 1m histogram_count(mixed_metric) -{} _ _ _ 4 6 6 + {} _ _ _ 4 6 6 eval range from 0 to 5m step 1m histogram_sum(mixed_metric) -{} _ _ _ 5 8 8 + {} _ _ _ 5 8 8 # histogram_count ignores any float values eval instant at 2m histogram_count(mixed_metric) diff --git a/pkg/streamingpromql/testdata/upstream/functions.test b/pkg/streamingpromql/testdata/upstream/functions.test index c09c569bcc4..c8ec9a2e6d0 100644 --- a/pkg/streamingpromql/testdata/upstream/functions.test +++ b/pkg/streamingpromql/testdata/upstream/functions.test @@ -460,11 +460,10 @@ load 5m # {src="clamp-b"} 0 # {src="clamp-c"} 100 -# Unsupported by streaming engine. -# eval instant at 0m clamp(test_clamp, -25, 75) -# {src="clamp-a"} -25 -# {src="clamp-b"} 0 -# {src="clamp-c"} 75 +eval instant at 0m clamp(test_clamp, -25, 75) + {src="clamp-a"} -25 + {src="clamp-b"} 0 + {src="clamp-c"} 75 # Unsupported by streaming engine. # eval instant at 0m clamp_max(clamp_min(test_clamp, -20), 70) @@ -478,20 +477,17 @@ load 5m # {src="clamp-b"} 0 # {src="clamp-c"} 70 -# Unsupported by streaming engine. -# eval instant at 0m clamp(test_clamp, 0, NaN) -# {src="clamp-a"} NaN -# {src="clamp-b"} NaN -# {src="clamp-c"} NaN +eval instant at 0m clamp(test_clamp, 0, NaN) + {src="clamp-a"} NaN + {src="clamp-b"} NaN + {src="clamp-c"} NaN -# Unsupported by streaming engine. -# eval instant at 0m clamp(test_clamp, NaN, 0) -# {src="clamp-a"} NaN -# {src="clamp-b"} NaN -# {src="clamp-c"} NaN +eval instant at 0m clamp(test_clamp, NaN, 0) + {src="clamp-a"} NaN + {src="clamp-b"} NaN + {src="clamp-c"} NaN -# Unsupported by streaming engine. -# eval instant at 0m clamp(test_clamp, 5, -5) +eval instant at 0m clamp(test_clamp, 5, -5) # Test cases for sgn. clear diff --git a/pkg/streamingpromql/testdata/upstream/name_label_dropping.test b/pkg/streamingpromql/testdata/upstream/name_label_dropping.test index 8dd2b86f045..b8306dad326 100644 --- a/pkg/streamingpromql/testdata/upstream/name_label_dropping.test +++ b/pkg/streamingpromql/testdata/upstream/name_label_dropping.test @@ -33,9 +33,8 @@ eval instant at 15m metric * 2 {env="1"} 240 # Drops __name__ for instant-vector functions -# Unsupported by streaming engine. -# eval instant at 15m clamp(metric, 0, 100) -# {env="1"} 100 +eval instant at 15m clamp(metric, 0, 100) + {env="1"} 100 # Drops __name__ for range-vector functions eval instant at 15m rate(metric{env="1"}[10m]) diff --git a/pkg/util/test/metrics.go b/pkg/util/test/metrics.go index 78a43f83504..6c7c38aedd4 100644 --- a/pkg/util/test/metrics.go +++ b/pkg/util/test/metrics.go @@ -3,15 +3,9 @@ package test import ( - "bufio" "fmt" "io" "strings" - "testing" - - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/testutil" - "github.com/stretchr/testify/assert" ) type ExpectedMetricsContext struct { @@ -66,63 +60,3 @@ func (m *ExpectedMetrics) GetOutput() io.Reader { func (m *ExpectedMetrics) GetNames() []string { return m.Names } - -// AssertGatherAndCompare asserts that metrics in expectedText are found among g's metrics. -// If, however, any metrics are provided, the following rules apply: -// * Provided metrics that also exist in expectedText are required to be found among g's metrics. -// * Provided metrics that don't exist in expectedText are required to be absent from g's metrics. -func AssertGatherAndCompare(t *testing.T, g prometheus.Gatherer, expectedText string, metrics ...string) { - t.Helper() - assert.NoError(t, gatherAndCompare(g, expectedText, metrics...)) -} - -func gatherAndCompare(g prometheus.Gatherer, expectedText string, metrics ...string) error { - if len(metrics) == 0 { - return testutil.GatherAndCompare(g, strings.NewReader(expectedText)) - } - - sc := bufio.NewScanner(strings.NewReader(expectedText)) - absent := make([]string, len(metrics)) - copy(absent, metrics) - required := make([]string, 0, len(metrics)) - for sc.Scan() { - line := strings.TrimSpace(sc.Text()) - if len(line) == 0 || strings.HasPrefix(line, "#") { - continue - } - for i, metric := range absent { - if strings.HasPrefix(line, metric) && (line[len(metric)] == ' ' || line[len(metric)] == '{') { - absent = append(absent[:i], absent[i+1:]...) - required = append(required, metric) - break - } - } - } - // Sanity check. - if len(required)+len(absent) != len(metrics) { - panic(fmt.Errorf("length of required+absent doesn't match up with metrics")) - } - - if len(required) > 0 { - if err := testutil.GatherAndCompare(g, strings.NewReader(expectedText), required...); err != nil { - return err - } - } - - notAbsent := []string{} - for _, metric := range absent { - count, err := testutil.GatherAndCount(g, metric) - if err != nil { - return fmt.Errorf("GatherAndCount(g, %s): %w", metric, err) - } - - if count > 0 { - notAbsent = append(notAbsent, metric) - } - } - if len(notAbsent) > 0 { - return fmt.Errorf("should be absent: metrics=%s", strings.Join(notAbsent, ", ")) - } - - return nil -} diff --git a/pkg/util/test/metrics_test.go b/pkg/util/test/metrics_test.go deleted file mode 100644 index 43978aa6784..00000000000 --- a/pkg/util/test/metrics_test.go +++ /dev/null @@ -1,126 +0,0 @@ -// SPDX-License-Identifier: AGPL-3.0-only - -package test - -import ( - "testing" - - dto "github.com/prometheus/client_model/go" - "github.com/stretchr/testify/require" - "google.golang.org/protobuf/proto" -) - -func TestAssertGatherAndCompare(t *testing.T) { - g := fakeGatherer{ - metrics: []*dto.MetricFamily{ - { - Name: proto.String("cortex_distributor_deduped_samples_total"), - Help: proto.String("The total number of deduplicated samples."), - Metric: []*dto.Metric{ - { - Label: []*dto.LabelPair{ - { - Name: proto.String("environment"), - Value: proto.String("test"), - }, - }, - TimestampMs: proto.Int64(1000), - Counter: &dto.Counter{ - Value: proto.Float64(1), - }, - }, - }, - }, - }, - } - - t.Run("don't specify any metrics", func(t *testing.T) { - // When not specifying any metrics, an error should be returned due to missing metric - // cortex_distributor_latest_seen_sample_timestamp_seconds. - err := gatherAndCompare(g, ` - # HELP cortex_distributor_deduped_samples_total The total number of deduplicated samples. - # TYPE cortex_distributor_deduped_samples_total counter - cortex_distributor_deduped_samples_total{environment="test"} 1 1000 - - # HELP cortex_distributor_latest_seen_sample_timestamp_seconds Unix timestamp of latest received sample per user. - # TYPE cortex_distributor_latest_seen_sample_timestamp_seconds gauge - cortex_distributor_latest_seen_sample_timestamp_seconds{user="userA"} 1111 - `) - require.EqualError(t, err, ` # HELP cortex_distributor_deduped_samples_total The total number of deduplicated samples. - # TYPE cortex_distributor_deduped_samples_total counter - cortex_distributor_deduped_samples_total{environment="test"} 1 1000 -+# HELP cortex_distributor_latest_seen_sample_timestamp_seconds Unix timestamp of latest received sample per user. -+# TYPE cortex_distributor_latest_seen_sample_timestamp_seconds gauge -+cortex_distributor_latest_seen_sample_timestamp_seconds{user="userA"} 1111 - `) - }) - - t.Run("specify required metric", func(t *testing.T) { - // When specifying that cortex_distributor_deduped_samples_total as the one required metric, - // cortex_distributor_latest_seen_sample_timestamp_seconds should be ignored even if it's missing. - AssertGatherAndCompare(t, g, ` - # HELP cortex_distributor_deduped_samples_total The total number of deduplicated samples. - # TYPE cortex_distributor_deduped_samples_total counter - cortex_distributor_deduped_samples_total{environment="test"} 1 1000 - - # HELP cortex_distributor_latest_seen_sample_timestamp_seconds Unix timestamp of latest received sample per user. - # TYPE cortex_distributor_latest_seen_sample_timestamp_seconds gauge - cortex_distributor_latest_seen_sample_timestamp_seconds{user="userA"} 1111 - `, "cortex_distributor_deduped_samples_total") - }) - - t.Run("specify required metric which isn't there", func(t *testing.T) { - // When specifying that cortex_distributor_deduped_samples_total as the one required metric, - // cortex_distributor_latest_seen_sample_timestamp_seconds should be ignored even if it's missing. - err := gatherAndCompare(g, ` - # HELP cortex_distributor_deduped_samples_total The total number of deduplicated samples. - # TYPE cortex_distributor_deduped_samples_total counter - cortex_distributor_deduped_samples_total{environment="test"} 1 1000 - - # HELP cortex_distributor_latest_seen_sample_timestamp_seconds Unix timestamp of latest received sample per user. - # TYPE cortex_distributor_latest_seen_sample_timestamp_seconds gauge - cortex_distributor_latest_seen_sample_timestamp_seconds{user="userA"} 1111 - `, "cortex_distributor_latest_seen_sample_timestamp_seconds") - require.EqualError(t, err, "expected metric name(s) not found: [cortex_distributor_latest_seen_sample_timestamp_seconds]") - }) - - t.Run("specify required metric and absent metric", func(t *testing.T) { - // Verify that cortex_distributor_deduped_samples_total is found among metrics returned by g, - // and that conversely, cortex_distributor_non_ha_samples_received_total is not found among - // metrics returned by g. - // cortex_distributor_latest_seen_sample_timestamp_seconds is ignored, since it's not among - // the specified metrics. - AssertGatherAndCompare(t, g, ` - # HELP cortex_distributor_deduped_samples_total The total number of deduplicated samples. - # TYPE cortex_distributor_deduped_samples_total counter - cortex_distributor_deduped_samples_total{environment="test"} 1 1000 - - # HELP cortex_distributor_latest_seen_sample_timestamp_seconds Unix timestamp of latest received sample per user. - # TYPE cortex_distributor_latest_seen_sample_timestamp_seconds gauge - cortex_distributor_latest_seen_sample_timestamp_seconds{user="userA"} 1111 - `, "cortex_distributor_deduped_samples_total", "cortex_distributor_non_ha_samples_received_total") - }) - - t.Run("specify absent metric which is actually there", func(t *testing.T) { - // Verify that cortex_distributor_deduped_samples_total is found among metrics returned by g, - // and that conversely, cortex_distributor_non_ha_samples_received_total is not found among - // metrics returned by g. - // cortex_distributor_latest_seen_sample_timestamp_seconds is ignored, since it's not among - // the specified metrics. - err := gatherAndCompare(g, ` - # HELP cortex_distributor_latest_seen_sample_timestamp_seconds Unix timestamp of latest received sample per user. - # TYPE cortex_distributor_latest_seen_sample_timestamp_seconds gauge - cortex_distributor_latest_seen_sample_timestamp_seconds{user="userA"} 1111 - `, "cortex_distributor_deduped_samples_total", "cortex_distributor_non_ha_samples_received_total") - require.EqualError(t, err, "should be absent: metrics=cortex_distributor_deduped_samples_total") - }) -} - -type fakeGatherer struct { - metrics []*dto.MetricFamily - err error -} - -func (g fakeGatherer) Gather() ([]*dto.MetricFamily, error) { - return g.metrics, g.err -} diff --git a/tools/querytee/proxy_endpoint.go b/tools/querytee/proxy_endpoint.go index f49f0374c96..796a8475ead 100644 --- a/tools/querytee/proxy_endpoint.go +++ b/tools/querytee/proxy_endpoint.go @@ -259,7 +259,7 @@ func (p *ProxyEndpoint) executeBackendRequests(req *http.Request, backends []Pro expectedResponse, actualResponse = actualResponse, expectedResponse } - result, err := p.compareResponses(expectedResponse, actualResponse, evaluationTime) + result, err := p.compareResponses(ctx, expectedResponse, actualResponse, evaluationTime) if result == ComparisonFailed { level.Error(logger).Log( "msg", "response comparison failed", @@ -315,7 +315,7 @@ func (p *ProxyEndpoint) waitBackendResponseForDownstream(resCh chan *backendResp return firstResponse } -func (p *ProxyEndpoint) compareResponses(expectedResponse, actualResponse *backendResponse, queryEvaluationTime time.Time) (ComparisonResult, error) { +func (p *ProxyEndpoint) compareResponses(ctx context.Context, expectedResponse, actualResponse *backendResponse, queryEvaluationTime time.Time) (ComparisonResult, error) { if expectedResponse.err != nil { return ComparisonFailed, fmt.Errorf("skipped comparison of response because the request to the preferred backend failed: %w", expectedResponse.err) } @@ -324,6 +324,10 @@ func (p *ProxyEndpoint) compareResponses(expectedResponse, actualResponse *backe return ComparisonFailed, fmt.Errorf("skipped comparison of response because the request to the secondary backend failed: %w", actualResponse.err) } + if ctx.Err() != nil { + return ComparisonSkipped, fmt.Errorf("skipped comparison of response because the incoming request to query-tee was cancelled: %w", context.Cause(ctx)) + } + if expectedResponse.status != actualResponse.status { return ComparisonFailed, fmt.Errorf("expected status code %d (returned by preferred backend) but got %d from secondary backend", expectedResponse.status, actualResponse.status) } diff --git a/tools/querytee/proxy_endpoint_test.go b/tools/querytee/proxy_endpoint_test.go index 2658f171d51..109b7306a62 100644 --- a/tools/querytee/proxy_endpoint_test.go +++ b/tools/querytee/proxy_endpoint_test.go @@ -363,6 +363,57 @@ func Test_ProxyEndpoint_Comparison(t *testing.T) { }) } } +func Test_ProxyEndpoint_ComparisonWithRequestCancellation(t *testing.T) { + testRoute := Route{RouteName: "test"} + ctx, cancel := context.WithCancelCause(context.Background()) + + preferredBackend := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + cancel(errors.New("something cancelled this request")) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, err := w.Write([]byte("preferred response")) + require.NoError(t, err) + })) + + defer preferredBackend.Close() + preferredBackendURL, err := url.Parse(preferredBackend.URL) + require.NoError(t, err) + + secondaryBackend := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + <-ctx.Done() // Wait until the incoming request has been cancelled. + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusTeapot) + _, err := w.Write([]byte("secondary response")) + require.NoError(t, err) + })) + + defer secondaryBackend.Close() + secondaryBackendURL, err := url.Parse(secondaryBackend.URL) + require.NoError(t, err) + + backends := []ProxyBackendInterface{ + NewProxyBackend("preferred-backend", preferredBackendURL, time.Second, true, false), + NewProxyBackend("secondary-backend", secondaryBackendURL, time.Second, false, false), + } + + logger := newMockLogger() + reg := prometheus.NewPedanticRegistry() + comparator := NewSamplesComparator(SampleComparisonOptions{}) + endpoint := NewProxyEndpoint(backends, testRoute, NewProxyMetrics(reg), logger, comparator, 0, 1.0) + + resp := httptest.NewRecorder() + req, err := http.NewRequestWithContext(ctx, "GET", "http://test/api/v1/test", nil) + require.NoError(t, err) + endpoint.ServeHTTP(resp, req) + require.Equal(t, "preferred response", resp.Body.String()) + require.Equal(t, http.StatusOK, resp.Code) + + // The HTTP request above will return as soon as the primary response is received, but this doesn't guarantee that the response comparison has been completed. + // Wait for the response comparison to complete before checking the logged messages. + waitForResponseComparisonMetric(t, reg, ComparisonSkipped, 1) + requireLogMessageWithError(t, logger.messages, "response comparison skipped", "skipped comparison of response because the incoming request to query-tee was cancelled: something cancelled this request") +} func Test_ProxyEndpoint_LogSlowQueries(t *testing.T) { testRoute := Route{RouteName: "test"} diff --git a/vendor/github.com/ebitengine/purego/.gitignore b/vendor/github.com/ebitengine/purego/.gitignore new file mode 100644 index 00000000000..b25c15b81fa --- /dev/null +++ b/vendor/github.com/ebitengine/purego/.gitignore @@ -0,0 +1 @@ +*~ diff --git a/vendor/github.com/ebitengine/purego/LICENSE b/vendor/github.com/ebitengine/purego/LICENSE new file mode 100644 index 00000000000..8dada3edaf5 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/ebitengine/purego/README.md b/vendor/github.com/ebitengine/purego/README.md new file mode 100644 index 00000000000..f1ff9053ace --- /dev/null +++ b/vendor/github.com/ebitengine/purego/README.md @@ -0,0 +1,97 @@ +# purego +[![Go Reference](https://pkg.go.dev/badge/github.com/ebitengine/purego?GOOS=darwin.svg)](https://pkg.go.dev/github.com/ebitengine/purego?GOOS=darwin) + +A library for calling C functions from Go without Cgo. + +> This is beta software so expect bugs and potentially API breaking changes +> but each release will be tagged to avoid breaking people's code. +> Bug reports are encouraged. + +## Motivation + +The [Ebitengine](https://github.com/hajimehoshi/ebiten) game engine was ported to use only Go on Windows. This enabled +cross-compiling to Windows from any other operating system simply by setting `GOOS=windows`. The purego project was +born to bring that same vision to the other platforms supported by Ebitengine. + +## Benefits + +- **Simple Cross-Compilation**: No C means you can build for other platforms easily without a C compiler. +- **Faster Compilation**: Efficiently cache your entirely Go builds. +- **Smaller Binaries**: Using Cgo generates a C wrapper function for each C function called. Purego doesn't! +- **Dynamic Linking**: Load symbols at runtime and use it as a plugin system. +- **Foreign Function Interface**: Call into other languages that are compiled into shared objects. +- **Cgo Fallback**: Works even with CGO_ENABLED=1 so incremental porting is possible. +This also means unsupported GOARCHs (freebsd/riscv64, linux/mips, etc.) will still work +except for float arguments and return values. + +## Supported Platforms + +- **FreeBSD**: amd64, arm64 +- **Linux**: amd64, arm64 +- **macOS / iOS**: amd64, arm64 +- **Windows**: 386*, amd64, arm*, arm64 + +`*` These architectures only support SyscallN and NewCallback + +## Example + +The example below only showcases purego use for macOS and Linux. The other platforms require special handling which can +be seen in the complete example at [examples/libc](https://github.com/ebitengine/purego/tree/main/examples/libc) which supports Windows and FreeBSD. + +```go +package main + +import ( + "fmt" + "runtime" + + "github.com/ebitengine/purego" +) + +func getSystemLibrary() string { + switch runtime.GOOS { + case "darwin": + return "/usr/lib/libSystem.B.dylib" + case "linux": + return "libc.so.6" + default: + panic(fmt.Errorf("GOOS=%s is not supported", runtime.GOOS)) + } +} + +func main() { + libc, err := purego.Dlopen(getSystemLibrary(), purego.RTLD_NOW|purego.RTLD_GLOBAL) + if err != nil { + panic(err) + } + var puts func(string) + purego.RegisterLibFunc(&puts, libc, "puts") + puts("Calling C from Go without Cgo!") +} +``` + +Then to run: `CGO_ENABLED=0 go run main.go` + +## Questions + +If you have questions about how to incorporate purego in your project or want to discuss +how it works join the [Discord](https://discord.gg/HzGZVD6BkY)! + +### External Code + +Purego uses code that originates from the Go runtime. These files are under the BSD-3 +License that can be found [in the Go Source](https://github.com/golang/go/blob/master/LICENSE). +This is a list of the copied files: + +* `abi_*.h` from package `runtime/cgo` +* `zcallback_darwin_*.s` from package `runtime` +* `internal/fakecgo/abi_*.h` from package `runtime/cgo` +* `internal/fakecgo/asm_GOARCH.s` from package `runtime/cgo` +* `internal/fakecgo/callbacks.go` from package `runtime/cgo` +* `internal/fakecgo/go_GOOS_GOARCH.go` from package `runtime/cgo` +* `internal/fakecgo/iscgo.go` from package `runtime/cgo` +* `internal/fakecgo/setenv.go` from package `runtime/cgo` +* `internal/fakecgo/freebsd.go` from package `runtime/cgo` + +The files `abi_*.h` and `internal/fakecgo/abi_*.h` are the same because Bazel does not support cross-package use of +`#include` so we need each one once per package. (cf. [issue](https://github.com/bazelbuild/rules_go/issues/3636)) diff --git a/vendor/github.com/ebitengine/purego/abi_amd64.h b/vendor/github.com/ebitengine/purego/abi_amd64.h new file mode 100644 index 00000000000..9949435fe9e --- /dev/null +++ b/vendor/github.com/ebitengine/purego/abi_amd64.h @@ -0,0 +1,99 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Macros for transitioning from the host ABI to Go ABI0. +// +// These save the frame pointer, so in general, functions that use +// these should have zero frame size to suppress the automatic frame +// pointer, though it's harmless to not do this. + +#ifdef GOOS_windows + +// REGS_HOST_TO_ABI0_STACK is the stack bytes used by +// PUSH_REGS_HOST_TO_ABI0. +#define REGS_HOST_TO_ABI0_STACK (28*8 + 8) + +// PUSH_REGS_HOST_TO_ABI0 prepares for transitioning from +// the host ABI to Go ABI0 code. It saves all registers that are +// callee-save in the host ABI and caller-save in Go ABI0 and prepares +// for entry to Go. +// +// Save DI SI BP BX R12 R13 R14 R15 X6-X15 registers and the DF flag. +// Clear the DF flag for the Go ABI. +// MXCSR matches the Go ABI, so we don't have to set that, +// and Go doesn't modify it, so we don't have to save it. +#define PUSH_REGS_HOST_TO_ABI0() \ + PUSHFQ \ + CLD \ + ADJSP $(REGS_HOST_TO_ABI0_STACK - 8) \ + MOVQ DI, (0*0)(SP) \ + MOVQ SI, (1*8)(SP) \ + MOVQ BP, (2*8)(SP) \ + MOVQ BX, (3*8)(SP) \ + MOVQ R12, (4*8)(SP) \ + MOVQ R13, (5*8)(SP) \ + MOVQ R14, (6*8)(SP) \ + MOVQ R15, (7*8)(SP) \ + MOVUPS X6, (8*8)(SP) \ + MOVUPS X7, (10*8)(SP) \ + MOVUPS X8, (12*8)(SP) \ + MOVUPS X9, (14*8)(SP) \ + MOVUPS X10, (16*8)(SP) \ + MOVUPS X11, (18*8)(SP) \ + MOVUPS X12, (20*8)(SP) \ + MOVUPS X13, (22*8)(SP) \ + MOVUPS X14, (24*8)(SP) \ + MOVUPS X15, (26*8)(SP) + +#define POP_REGS_HOST_TO_ABI0() \ + MOVQ (0*0)(SP), DI \ + MOVQ (1*8)(SP), SI \ + MOVQ (2*8)(SP), BP \ + MOVQ (3*8)(SP), BX \ + MOVQ (4*8)(SP), R12 \ + MOVQ (5*8)(SP), R13 \ + MOVQ (6*8)(SP), R14 \ + MOVQ (7*8)(SP), R15 \ + MOVUPS (8*8)(SP), X6 \ + MOVUPS (10*8)(SP), X7 \ + MOVUPS (12*8)(SP), X8 \ + MOVUPS (14*8)(SP), X9 \ + MOVUPS (16*8)(SP), X10 \ + MOVUPS (18*8)(SP), X11 \ + MOVUPS (20*8)(SP), X12 \ + MOVUPS (22*8)(SP), X13 \ + MOVUPS (24*8)(SP), X14 \ + MOVUPS (26*8)(SP), X15 \ + ADJSP $-(REGS_HOST_TO_ABI0_STACK - 8) \ + POPFQ + +#else +// SysV ABI + +#define REGS_HOST_TO_ABI0_STACK (6*8) + +// SysV MXCSR matches the Go ABI, so we don't have to set that, +// and Go doesn't modify it, so we don't have to save it. +// Both SysV and Go require DF to be cleared, so that's already clear. +// The SysV and Go frame pointer conventions are compatible. +#define PUSH_REGS_HOST_TO_ABI0() \ + ADJSP $(REGS_HOST_TO_ABI0_STACK) \ + MOVQ BP, (5*8)(SP) \ + LEAQ (5*8)(SP), BP \ + MOVQ BX, (0*8)(SP) \ + MOVQ R12, (1*8)(SP) \ + MOVQ R13, (2*8)(SP) \ + MOVQ R14, (3*8)(SP) \ + MOVQ R15, (4*8)(SP) + +#define POP_REGS_HOST_TO_ABI0() \ + MOVQ (0*8)(SP), BX \ + MOVQ (1*8)(SP), R12 \ + MOVQ (2*8)(SP), R13 \ + MOVQ (3*8)(SP), R14 \ + MOVQ (4*8)(SP), R15 \ + MOVQ (5*8)(SP), BP \ + ADJSP $-(REGS_HOST_TO_ABI0_STACK) + +#endif diff --git a/vendor/github.com/ebitengine/purego/abi_arm64.h b/vendor/github.com/ebitengine/purego/abi_arm64.h new file mode 100644 index 00000000000..5d5061ec1db --- /dev/null +++ b/vendor/github.com/ebitengine/purego/abi_arm64.h @@ -0,0 +1,39 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Macros for transitioning from the host ABI to Go ABI0. +// +// These macros save and restore the callee-saved registers +// from the stack, but they don't adjust stack pointer, so +// the user should prepare stack space in advance. +// SAVE_R19_TO_R28(offset) saves R19 ~ R28 to the stack space +// of ((offset)+0*8)(RSP) ~ ((offset)+9*8)(RSP). +// +// SAVE_F8_TO_F15(offset) saves F8 ~ F15 to the stack space +// of ((offset)+0*8)(RSP) ~ ((offset)+7*8)(RSP). +// +// R29 is not saved because Go will save and restore it. + +#define SAVE_R19_TO_R28(offset) \ + STP (R19, R20), ((offset)+0*8)(RSP) \ + STP (R21, R22), ((offset)+2*8)(RSP) \ + STP (R23, R24), ((offset)+4*8)(RSP) \ + STP (R25, R26), ((offset)+6*8)(RSP) \ + STP (R27, g), ((offset)+8*8)(RSP) +#define RESTORE_R19_TO_R28(offset) \ + LDP ((offset)+0*8)(RSP), (R19, R20) \ + LDP ((offset)+2*8)(RSP), (R21, R22) \ + LDP ((offset)+4*8)(RSP), (R23, R24) \ + LDP ((offset)+6*8)(RSP), (R25, R26) \ + LDP ((offset)+8*8)(RSP), (R27, g) /* R28 */ +#define SAVE_F8_TO_F15(offset) \ + FSTPD (F8, F9), ((offset)+0*8)(RSP) \ + FSTPD (F10, F11), ((offset)+2*8)(RSP) \ + FSTPD (F12, F13), ((offset)+4*8)(RSP) \ + FSTPD (F14, F15), ((offset)+6*8)(RSP) +#define RESTORE_F8_TO_F15(offset) \ + FLDPD ((offset)+0*8)(RSP), (F8, F9) \ + FLDPD ((offset)+2*8)(RSP), (F10, F11) \ + FLDPD ((offset)+4*8)(RSP), (F12, F13) \ + FLDPD ((offset)+6*8)(RSP), (F14, F15) diff --git a/vendor/github.com/ebitengine/purego/cgo.go b/vendor/github.com/ebitengine/purego/cgo.go new file mode 100644 index 00000000000..7d5abef3499 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/cgo.go @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +//go:build cgo && (darwin || freebsd || linux) + +package purego + +// if CGO_ENABLED=1 import the Cgo runtime to ensure that it is set up properly. +// This is required since some frameworks need TLS setup the C way which Go doesn't do. +// We currently don't support ios in fakecgo mode so force Cgo or fail +// Even if CGO_ENABLED=1 the Cgo runtime is not imported unless `import "C"` is used. +// which will import this package automatically. Normally this isn't an issue since it +// usually isn't possible to call into C without using that import. However, with purego +// it is since we don't use `import "C"`! +import ( + _ "runtime/cgo" + + _ "github.com/ebitengine/purego/internal/cgo" +) diff --git a/vendor/github.com/ebitengine/purego/dlerror.go b/vendor/github.com/ebitengine/purego/dlerror.go new file mode 100644 index 00000000000..95cdfe16f24 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/dlerror.go @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2023 The Ebitengine Authors + +//go:build darwin || freebsd || linux + +package purego + +// Dlerror represents an error value returned from Dlopen, Dlsym, or Dlclose. +// +// This type is not available on Windows as there is no counterpart to it on Windows. +type Dlerror struct { + s string +} + +func (e Dlerror) Error() string { + return e.s +} diff --git a/vendor/github.com/ebitengine/purego/dlfcn.go b/vendor/github.com/ebitengine/purego/dlfcn.go new file mode 100644 index 00000000000..f70a24584d6 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/dlfcn.go @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +//go:build (darwin || freebsd || linux) && !android && !faketime + +package purego + +import ( + "unsafe" +) + +// Unix Specification for dlfcn.h: https://pubs.opengroup.org/onlinepubs/7908799/xsh/dlfcn.h.html + +var ( + fnDlopen func(path string, mode int) uintptr + fnDlsym func(handle uintptr, name string) uintptr + fnDlerror func() string + fnDlclose func(handle uintptr) bool +) + +func init() { + RegisterFunc(&fnDlopen, dlopenABI0) + RegisterFunc(&fnDlsym, dlsymABI0) + RegisterFunc(&fnDlerror, dlerrorABI0) + RegisterFunc(&fnDlclose, dlcloseABI0) +} + +// Dlopen examines the dynamic library or bundle file specified by path. If the file is compatible +// with the current process and has not already been loaded into the +// current process, it is loaded and linked. After being linked, if it contains +// any initializer functions, they are called, before Dlopen +// returns. It returns a handle that can be used with Dlsym and Dlclose. +// A second call to Dlopen with the same path will return the same handle, but the internal +// reference count for the handle will be incremented. Therefore, all +// Dlopen calls should be balanced with a Dlclose call. +// +// This function is not available on Windows. +// Use [golang.org/x/sys/windows.LoadLibrary], [golang.org/x/sys/windows.LoadLibraryEx], +// [golang.org/x/sys/windows.NewLazyDLL], or [golang.org/x/sys/windows.NewLazySystemDLL] for Windows instead. +func Dlopen(path string, mode int) (uintptr, error) { + u := fnDlopen(path, mode) + if u == 0 { + return 0, Dlerror{fnDlerror()} + } + return u, nil +} + +// Dlsym takes a "handle" of a dynamic library returned by Dlopen and the symbol name. +// It returns the address where that symbol is loaded into memory. If the symbol is not found, +// in the specified library or any of the libraries that were automatically loaded by Dlopen +// when that library was loaded, Dlsym returns zero. +// +// This function is not available on Windows. +// Use [golang.org/x/sys/windows.GetProcAddress] for Windows instead. +func Dlsym(handle uintptr, name string) (uintptr, error) { + u := fnDlsym(handle, name) + if u == 0 { + return 0, Dlerror{fnDlerror()} + } + return u, nil +} + +// Dlclose decrements the reference count on the dynamic library handle. +// If the reference count drops to zero and no other loaded libraries +// use symbols in it, then the dynamic library is unloaded. +// +// This function is not available on Windows. +// Use [golang.org/x/sys/windows.FreeLibrary] for Windows instead. +func Dlclose(handle uintptr) error { + if fnDlclose(handle) { + return Dlerror{fnDlerror()} + } + return nil +} + +func loadSymbol(handle uintptr, name string) (uintptr, error) { + return Dlsym(handle, name) +} + +// these functions exist in dlfcn_stubs.s and are calling C functions linked to in dlfcn_GOOS.go +// the indirection is necessary because a function is actually a pointer to the pointer to the code. +// sadly, I do not know of anyway to remove the assembly stubs entirely because //go:linkname doesn't +// appear to work if you link directly to the C function on darwin arm64. + +//go:linkname dlopen dlopen +var dlopen uintptr +var dlopenABI0 = uintptr(unsafe.Pointer(&dlopen)) + +//go:linkname dlsym dlsym +var dlsym uintptr +var dlsymABI0 = uintptr(unsafe.Pointer(&dlsym)) + +//go:linkname dlclose dlclose +var dlclose uintptr +var dlcloseABI0 = uintptr(unsafe.Pointer(&dlclose)) + +//go:linkname dlerror dlerror +var dlerror uintptr +var dlerrorABI0 = uintptr(unsafe.Pointer(&dlerror)) diff --git a/vendor/github.com/ebitengine/purego/dlfcn_android.go b/vendor/github.com/ebitengine/purego/dlfcn_android.go new file mode 100644 index 00000000000..0d5341764ed --- /dev/null +++ b/vendor/github.com/ebitengine/purego/dlfcn_android.go @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 The Ebitengine Authors + +package purego + +import "github.com/ebitengine/purego/internal/cgo" + +// Source for constants: https://android.googlesource.com/platform/bionic/+/refs/heads/main/libc/include/dlfcn.h + +const ( + is64bit = 1 << (^uintptr(0) >> 63) / 2 + is32bit = 1 - is64bit + RTLD_DEFAULT = is32bit * 0xffffffff + RTLD_LAZY = 0x00000001 + RTLD_NOW = is64bit * 0x00000002 + RTLD_LOCAL = 0x00000000 + RTLD_GLOBAL = is64bit*0x00100 | is32bit*0x00000002 +) + +func Dlopen(path string, mode int) (uintptr, error) { + return cgo.Dlopen(path, mode) +} + +func Dlsym(handle uintptr, name string) (uintptr, error) { + return cgo.Dlsym(handle, name) +} + +func Dlclose(handle uintptr) error { + return cgo.Dlclose(handle) +} + +func loadSymbol(handle uintptr, name string) (uintptr, error) { + return Dlsym(handle, name) +} diff --git a/vendor/github.com/ebitengine/purego/dlfcn_darwin.go b/vendor/github.com/ebitengine/purego/dlfcn_darwin.go new file mode 100644 index 00000000000..5f876278a3e --- /dev/null +++ b/vendor/github.com/ebitengine/purego/dlfcn_darwin.go @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +package purego + +// Source for constants: https://opensource.apple.com/source/dyld/dyld-360.14/include/dlfcn.h.auto.html + +const ( + RTLD_DEFAULT = 1<<64 - 2 // Pseudo-handle for dlsym so search for any loaded symbol + RTLD_LAZY = 0x1 // Relocations are performed at an implementation-dependent time. + RTLD_NOW = 0x2 // Relocations are performed when the object is loaded. + RTLD_LOCAL = 0x4 // All symbols are not made available for relocation processing by other modules. + RTLD_GLOBAL = 0x8 // All symbols are available for relocation processing of other modules. +) + +//go:cgo_import_dynamic purego_dlopen dlopen "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic purego_dlsym dlsym "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic purego_dlerror dlerror "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic purego_dlclose dlclose "/usr/lib/libSystem.B.dylib" + +//go:cgo_import_dynamic purego_dlopen dlopen "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic purego_dlsym dlsym "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic purego_dlerror dlerror "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic purego_dlclose dlclose "/usr/lib/libSystem.B.dylib" diff --git a/vendor/github.com/ebitengine/purego/dlfcn_freebsd.go b/vendor/github.com/ebitengine/purego/dlfcn_freebsd.go new file mode 100644 index 00000000000..6b371620d96 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/dlfcn_freebsd.go @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +package purego + +// Constants as defined in https://github.com/freebsd/freebsd-src/blob/main/include/dlfcn.h +const ( + intSize = 32 << (^uint(0) >> 63) // 32 or 64 + RTLD_DEFAULT = 1< C) +// +// string <=> char* +// bool <=> _Bool +// uintptr <=> uintptr_t +// uint <=> uint32_t or uint64_t +// uint8 <=> uint8_t +// uint16 <=> uint16_t +// uint32 <=> uint32_t +// uint64 <=> uint64_t +// int <=> int32_t or int64_t +// int8 <=> int8_t +// int16 <=> int16_t +// int32 <=> int32_t +// int64 <=> int64_t +// float32 <=> float +// float64 <=> double +// struct <=> struct (WIP - darwin only) +// func <=> C function +// unsafe.Pointer, *T <=> void* +// []T => void* +// +// There is a special case when the last argument of fptr is a variadic interface (or []interface} +// it will be expanded into a call to the C function as if it had the arguments in that slice. +// This means that using arg ...interface{} is like a cast to the function with the arguments inside arg. +// This is not the same as C variadic. +// +// # Memory +// +// In general it is not possible for purego to guarantee the lifetimes of objects returned or received from +// calling functions using RegisterFunc. For arguments to a C function it is important that the C function doesn't +// hold onto a reference to Go memory. This is the same as the [Cgo rules]. +// +// However, there are some special cases. When passing a string as an argument if the string does not end in a null +// terminated byte (\x00) then the string will be copied into memory maintained by purego. The memory is only valid for +// that specific call. Therefore, if the C code keeps a reference to that string it may become invalid at some +// undefined time. However, if the string does already contain a null-terminated byte then no copy is done. +// It is then the responsibility of the caller to ensure the string stays alive as long as it's needed in C memory. +// This can be done using runtime.KeepAlive or allocating the string in C memory using malloc. When a C function +// returns a null-terminated pointer to char a Go string can be used. Purego will allocate a new string in Go memory +// and copy the data over. This string will be garbage collected whenever Go decides it's no longer referenced. +// This C created string will not be freed by purego. If the pointer to char is not null-terminated or must continue +// to point to C memory (because it's a buffer for example) then use a pointer to byte and then convert that to a slice +// using unsafe.Slice. Doing this means that it becomes the responsibility of the caller to care about the lifetime +// of the pointer +// +// # Structs +// +// Purego can handle the most common structs that have fields of builtin types like int8, uint16, float32, etc. However, +// it does not support aligning fields properly. It is therefore the responsibility of the caller to ensure +// that all padding is added to the Go struct to match the C one. See `BoolStructFn` in struct_test.go for an example. +// +// # Example +// +// All functions below call this C function: +// +// char *foo(char *str); +// +// // Let purego convert types +// var foo func(s string) string +// goString := foo("copied") +// // Go will garbage collect this string +// +// // Manually, handle allocations +// var foo2 func(b string) *byte +// mustFree := foo2("not copied\x00") +// defer free(mustFree) +// +// [Cgo rules]: https://pkg.go.dev/cmd/cgo#hdr-Go_references_to_C +func RegisterFunc(fptr interface{}, cfn uintptr) { + fn := reflect.ValueOf(fptr).Elem() + ty := fn.Type() + if ty.Kind() != reflect.Func { + panic("purego: fptr must be a function pointer") + } + if ty.NumOut() > 1 { + panic("purego: function can only return zero or one values") + } + if cfn == 0 { + panic("purego: cfn is nil") + } + if ty.NumOut() == 1 && (ty.Out(0).Kind() == reflect.Float32 || ty.Out(0).Kind() == reflect.Float64) && + runtime.GOARCH != "arm64" && runtime.GOARCH != "amd64" { + panic("purego: float returns are not supported") + } + { + // this code checks how many registers and stack this function will use + // to avoid crashing with too many arguments + var ints int + var floats int + var stack int + for i := 0; i < ty.NumIn(); i++ { + arg := ty.In(i) + switch arg.Kind() { + case reflect.Func: + // This only does preliminary testing to ensure the CDecl argument + // is the first argument. Full testing is done when the callback is actually + // created in NewCallback. + for j := 0; j < arg.NumIn(); j++ { + in := arg.In(j) + if !in.AssignableTo(reflect.TypeOf(CDecl{})) { + continue + } + if j != 0 { + panic("purego: CDecl must be the first argument") + } + } + case reflect.String, reflect.Uintptr, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, + reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Ptr, reflect.UnsafePointer, + reflect.Slice, reflect.Bool: + if ints < numOfIntegerRegisters() { + ints++ + } else { + stack++ + } + case reflect.Float32, reflect.Float64: + const is32bit = unsafe.Sizeof(uintptr(0)) == 4 + if is32bit { + panic("purego: floats only supported on 64bit platforms") + } + if floats < numOfFloats { + floats++ + } else { + stack++ + } + case reflect.Struct: + if runtime.GOOS != "darwin" || (runtime.GOARCH != "amd64" && runtime.GOARCH != "arm64") { + panic("purego: struct arguments are only supported on darwin amd64 & arm64") + } + if arg.Size() == 0 { + continue + } + addInt := func(u uintptr) { + ints++ + } + addFloat := func(u uintptr) { + floats++ + } + addStack := func(u uintptr) { + stack++ + } + _ = addStruct(reflect.New(arg).Elem(), &ints, &floats, &stack, addInt, addFloat, addStack, nil) + default: + panic("purego: unsupported kind " + arg.Kind().String()) + } + } + if ty.NumOut() == 1 && ty.Out(0).Kind() == reflect.Struct { + if runtime.GOOS != "darwin" { + panic("purego: struct return values only supported on darwin arm64 & amd64") + } + outType := ty.Out(0) + checkStructFieldsSupported(outType) + if runtime.GOARCH == "amd64" && outType.Size() > maxRegAllocStructSize { + // on amd64 if struct is bigger than 16 bytes allocate the return struct + // and pass it in as a hidden first argument. + ints++ + } + } + sizeOfStack := maxArgs - numOfIntegerRegisters() + if stack > sizeOfStack { + panic("purego: too many arguments") + } + } + v := reflect.MakeFunc(ty, func(args []reflect.Value) (results []reflect.Value) { + if len(args) > 0 { + if variadic, ok := args[len(args)-1].Interface().([]interface{}); ok { + // subtract one from args bc the last argument in args is []interface{} + // which we are currently expanding + tmp := make([]reflect.Value, len(args)-1+len(variadic)) + n := copy(tmp, args[:len(args)-1]) + for i, v := range variadic { + tmp[n+i] = reflect.ValueOf(v) + } + args = tmp + } + } + var sysargs [maxArgs]uintptr + stack := sysargs[numOfIntegerRegisters():] + var floats [numOfFloats]uintptr + var numInts int + var numFloats int + var numStack int + var addStack, addInt, addFloat func(x uintptr) + if runtime.GOARCH == "arm64" || runtime.GOOS != "windows" { + // Windows arm64 uses the same calling convention as macOS and Linux + addStack = func(x uintptr) { + stack[numStack] = x + numStack++ + } + addInt = func(x uintptr) { + if numInts >= numOfIntegerRegisters() { + addStack(x) + } else { + sysargs[numInts] = x + numInts++ + } + } + addFloat = func(x uintptr) { + if numFloats < len(floats) { + floats[numFloats] = x + numFloats++ + } else { + addStack(x) + } + } + } else { + // On Windows amd64 the arguments are passed in the numbered registered. + // So the first int is in the first integer register and the first float + // is in the second floating register if there is already a first int. + // This is in contrast to how macOS and Linux pass arguments which + // tries to use as many registers as possible in the calling convention. + addStack = func(x uintptr) { + sysargs[numStack] = x + numStack++ + } + addInt = addStack + addFloat = addStack + } + + var keepAlive []interface{} + defer func() { + runtime.KeepAlive(keepAlive) + runtime.KeepAlive(args) + }() + var syscall syscall15Args + if ty.NumOut() == 1 && ty.Out(0).Kind() == reflect.Struct { + outType := ty.Out(0) + if runtime.GOARCH == "amd64" && outType.Size() > maxRegAllocStructSize { + val := reflect.New(outType) + keepAlive = append(keepAlive, val) + addInt(val.Pointer()) + } else if runtime.GOARCH == "arm64" && outType.Size() > maxRegAllocStructSize { + isAllFloats, numFields := isAllSameFloat(outType) + if !isAllFloats || numFields > 4 { + val := reflect.New(outType) + keepAlive = append(keepAlive, val) + syscall.arm64_r8 = val.Pointer() + } + } + } + for _, v := range args { + switch v.Kind() { + case reflect.String: + ptr := strings.CString(v.String()) + keepAlive = append(keepAlive, ptr) + addInt(uintptr(unsafe.Pointer(ptr))) + case reflect.Uintptr, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + addInt(uintptr(v.Uint())) + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + addInt(uintptr(v.Int())) + case reflect.Ptr, reflect.UnsafePointer, reflect.Slice: + // There is no need to keepAlive this pointer separately because it is kept alive in the args variable + addInt(v.Pointer()) + case reflect.Func: + addInt(NewCallback(v.Interface())) + case reflect.Bool: + if v.Bool() { + addInt(1) + } else { + addInt(0) + } + case reflect.Float32: + addFloat(uintptr(math.Float32bits(float32(v.Float())))) + case reflect.Float64: + addFloat(uintptr(math.Float64bits(v.Float()))) + case reflect.Struct: + keepAlive = addStruct(v, &numInts, &numFloats, &numStack, addInt, addFloat, addStack, keepAlive) + default: + panic("purego: unsupported kind: " + v.Kind().String()) + } + } + if runtime.GOARCH == "arm64" || runtime.GOOS != "windows" { + // Use the normal arm64 calling convention even on Windows + syscall = syscall15Args{ + cfn, + sysargs[0], sysargs[1], sysargs[2], sysargs[3], sysargs[4], sysargs[5], + sysargs[6], sysargs[7], sysargs[8], sysargs[9], sysargs[10], sysargs[11], + sysargs[12], sysargs[13], sysargs[14], + floats[0], floats[1], floats[2], floats[3], floats[4], floats[5], floats[6], floats[7], + syscall.arm64_r8, + } + runtime_cgocall(syscall15XABI0, unsafe.Pointer(&syscall)) + } else { + // This is a fallback for Windows amd64, 386, and arm. Note this may not support floats + syscall.a1, syscall.a2, _ = syscall_syscall15X(cfn, sysargs[0], sysargs[1], sysargs[2], sysargs[3], sysargs[4], + sysargs[5], sysargs[6], sysargs[7], sysargs[8], sysargs[9], sysargs[10], sysargs[11], + sysargs[12], sysargs[13], sysargs[14]) + syscall.f1 = syscall.a2 // on amd64 a2 stores the float return. On 32bit platforms floats aren't support + } + if ty.NumOut() == 0 { + return nil + } + outType := ty.Out(0) + v := reflect.New(outType).Elem() + switch outType.Kind() { + case reflect.Uintptr, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + v.SetUint(uint64(syscall.a1)) + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + v.SetInt(int64(syscall.a1)) + case reflect.Bool: + v.SetBool(byte(syscall.a1) != 0) + case reflect.UnsafePointer: + // We take the address and then dereference it to trick go vet from creating a possible miss-use of unsafe.Pointer + v.SetPointer(*(*unsafe.Pointer)(unsafe.Pointer(&syscall.a1))) + case reflect.Ptr: + v = reflect.NewAt(outType, unsafe.Pointer(&syscall.a1)).Elem() + case reflect.Func: + // wrap this C function in a nicely typed Go function + v = reflect.New(outType) + RegisterFunc(v.Interface(), syscall.a1) + case reflect.String: + v.SetString(strings.GoString(syscall.a1)) + case reflect.Float32: + // NOTE: syscall.r2 is only the floating return value on 64bit platforms. + // On 32bit platforms syscall.r2 is the upper part of a 64bit return. + v.SetFloat(float64(math.Float32frombits(uint32(syscall.f1)))) + case reflect.Float64: + // NOTE: syscall.r2 is only the floating return value on 64bit platforms. + // On 32bit platforms syscall.r2 is the upper part of a 64bit return. + v.SetFloat(math.Float64frombits(uint64(syscall.f1))) + case reflect.Struct: + v = getStruct(outType, syscall) + default: + panic("purego: unsupported return kind: " + outType.Kind().String()) + } + return []reflect.Value{v} + }) + fn.Set(v) +} + +// maxRegAllocStructSize is the biggest a struct can be while still fitting in registers. +// if it is bigger than this than enough space must be allocated on the heap and then passed into +// the function as the first parameter on amd64 or in R8 on arm64. +// +// If you change this make sure to update it in objc_runtime_darwin.go +const maxRegAllocStructSize = 16 + +func isAllSameFloat(ty reflect.Type) (allFloats bool, numFields int) { + allFloats = true + root := ty.Field(0).Type + for root.Kind() == reflect.Struct { + root = root.Field(0).Type + } + first := root.Kind() + if first != reflect.Float32 && first != reflect.Float64 { + allFloats = false + } + for i := 0; i < ty.NumField(); i++ { + f := ty.Field(i).Type + if f.Kind() == reflect.Struct { + var structNumFields int + allFloats, structNumFields = isAllSameFloat(f) + numFields += structNumFields + continue + } + numFields++ + if f.Kind() != first { + allFloats = false + } + } + return allFloats, numFields +} + +func checkStructFieldsSupported(ty reflect.Type) { + for i := 0; i < ty.NumField(); i++ { + f := ty.Field(i).Type + if f.Kind() == reflect.Array { + f = f.Elem() + } else if f.Kind() == reflect.Struct { + checkStructFieldsSupported(f) + continue + } + switch f.Kind() { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, + reflect.Uintptr, reflect.Ptr, reflect.UnsafePointer, reflect.Float64, reflect.Float32: + default: + panic(fmt.Sprintf("purego: struct field type %s is not supported", f)) + } + } +} + +func roundUpTo8(val uintptr) uintptr { + return (val + 7) &^ 7 +} + +func numOfIntegerRegisters() int { + switch runtime.GOARCH { + case "arm64": + return 8 + case "amd64": + return 6 + default: + // since this platform isn't supported and can therefore only access + // integer registers it is fine to return the maxArgs + return maxArgs + } +} diff --git a/vendor/github.com/ebitengine/purego/go_runtime.go b/vendor/github.com/ebitengine/purego/go_runtime.go new file mode 100644 index 00000000000..13671ff23f2 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/go_runtime.go @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +//go:build darwin || freebsd || linux || windows + +package purego + +import ( + "unsafe" +) + +//go:linkname runtime_cgocall runtime.cgocall +func runtime_cgocall(fn uintptr, arg unsafe.Pointer) int32 // from runtime/sys_libc.go diff --git a/vendor/github.com/ebitengine/purego/internal/cgo/dlfcn_cgo_unix.go b/vendor/github.com/ebitengine/purego/internal/cgo/dlfcn_cgo_unix.go new file mode 100644 index 00000000000..b09ecac1cfe --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/cgo/dlfcn_cgo_unix.go @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 The Ebitengine Authors + +//go:build freebsd || linux + +package cgo + +/* + #cgo LDFLAGS: -ldl + +#include +#include +*/ +import "C" + +import ( + "errors" + "unsafe" +) + +func Dlopen(filename string, flag int) (uintptr, error) { + cfilename := C.CString(filename) + defer C.free(unsafe.Pointer(cfilename)) + handle := C.dlopen(cfilename, C.int(flag)) + if handle == nil { + return 0, errors.New(C.GoString(C.dlerror())) + } + return uintptr(handle), nil +} + +func Dlsym(handle uintptr, symbol string) (uintptr, error) { + csymbol := C.CString(symbol) + defer C.free(unsafe.Pointer(csymbol)) + symbolAddr := C.dlsym(*(*unsafe.Pointer)(unsafe.Pointer(&handle)), csymbol) + if symbolAddr == nil { + return 0, errors.New(C.GoString(C.dlerror())) + } + return uintptr(symbolAddr), nil +} + +func Dlclose(handle uintptr) error { + result := C.dlclose(*(*unsafe.Pointer)(unsafe.Pointer(&handle))) + if result != 0 { + return errors.New(C.GoString(C.dlerror())) + } + return nil +} + +// all that is needed is to assign each dl function because then its +// symbol will then be made available to the linker and linked to inside dlfcn.go +var ( + _ = C.dlopen + _ = C.dlsym + _ = C.dlerror + _ = C.dlclose +) diff --git a/vendor/github.com/ebitengine/purego/internal/cgo/empty.go b/vendor/github.com/ebitengine/purego/internal/cgo/empty.go new file mode 100644 index 00000000000..1d7cffe2a7e --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/cgo/empty.go @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 The Ebitengine Authors + +package cgo + +// Empty so that importing this package doesn't cause issue for certain platforms. diff --git a/vendor/github.com/ebitengine/purego/internal/cgo/syscall_cgo_unix.go b/vendor/github.com/ebitengine/purego/internal/cgo/syscall_cgo_unix.go new file mode 100644 index 00000000000..37ff24d5c1d --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/cgo/syscall_cgo_unix.go @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +//go:build freebsd || (linux && !(arm64 || amd64)) + +package cgo + +// this file is placed inside internal/cgo and not package purego +// because Cgo and assembly files can't be in the same package. + +/* + #cgo LDFLAGS: -ldl + +#include +#include +#include +#include + +typedef struct syscall15Args { + uintptr_t fn; + uintptr_t a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15; + uintptr_t f1, f2, f3, f4, f5, f6, f7, f8; + uintptr_t err; +} syscall15Args; + +void syscall15(struct syscall15Args *args) { + assert((args->f1|args->f2|args->f3|args->f4|args->f5|args->f6|args->f7|args->f8) == 0); + uintptr_t (*func_name)(uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4, uintptr_t a5, uintptr_t a6, + uintptr_t a7, uintptr_t a8, uintptr_t a9, uintptr_t a10, uintptr_t a11, uintptr_t a12, + uintptr_t a13, uintptr_t a14, uintptr_t a15); + *(void**)(&func_name) = (void*)(args->fn); + uintptr_t r1 = func_name(args->a1,args->a2,args->a3,args->a4,args->a5,args->a6,args->a7,args->a8,args->a9, + args->a10,args->a11,args->a12,args->a13,args->a14,args->a15); + args->a1 = r1; + args->err = errno; +} + +*/ +import "C" +import "unsafe" + +// assign purego.syscall15XABI0 to the C version of this function. +var Syscall15XABI0 = unsafe.Pointer(C.syscall15) + +//go:nosplit +func Syscall15X(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr) (r1, r2, err uintptr) { + args := C.syscall15Args{ + C.uintptr_t(fn), C.uintptr_t(a1), C.uintptr_t(a2), C.uintptr_t(a3), + C.uintptr_t(a4), C.uintptr_t(a5), C.uintptr_t(a6), + C.uintptr_t(a7), C.uintptr_t(a8), C.uintptr_t(a9), C.uintptr_t(a10), C.uintptr_t(a11), C.uintptr_t(a12), + C.uintptr_t(a13), C.uintptr_t(a14), C.uintptr_t(a15), 0, 0, 0, 0, 0, 0, 0, 0, 0, + } + C.syscall15(&args) + return uintptr(args.a1), 0, uintptr(args.err) +} diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/abi_amd64.h b/vendor/github.com/ebitengine/purego/internal/fakecgo/abi_amd64.h new file mode 100644 index 00000000000..9949435fe9e --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/abi_amd64.h @@ -0,0 +1,99 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Macros for transitioning from the host ABI to Go ABI0. +// +// These save the frame pointer, so in general, functions that use +// these should have zero frame size to suppress the automatic frame +// pointer, though it's harmless to not do this. + +#ifdef GOOS_windows + +// REGS_HOST_TO_ABI0_STACK is the stack bytes used by +// PUSH_REGS_HOST_TO_ABI0. +#define REGS_HOST_TO_ABI0_STACK (28*8 + 8) + +// PUSH_REGS_HOST_TO_ABI0 prepares for transitioning from +// the host ABI to Go ABI0 code. It saves all registers that are +// callee-save in the host ABI and caller-save in Go ABI0 and prepares +// for entry to Go. +// +// Save DI SI BP BX R12 R13 R14 R15 X6-X15 registers and the DF flag. +// Clear the DF flag for the Go ABI. +// MXCSR matches the Go ABI, so we don't have to set that, +// and Go doesn't modify it, so we don't have to save it. +#define PUSH_REGS_HOST_TO_ABI0() \ + PUSHFQ \ + CLD \ + ADJSP $(REGS_HOST_TO_ABI0_STACK - 8) \ + MOVQ DI, (0*0)(SP) \ + MOVQ SI, (1*8)(SP) \ + MOVQ BP, (2*8)(SP) \ + MOVQ BX, (3*8)(SP) \ + MOVQ R12, (4*8)(SP) \ + MOVQ R13, (5*8)(SP) \ + MOVQ R14, (6*8)(SP) \ + MOVQ R15, (7*8)(SP) \ + MOVUPS X6, (8*8)(SP) \ + MOVUPS X7, (10*8)(SP) \ + MOVUPS X8, (12*8)(SP) \ + MOVUPS X9, (14*8)(SP) \ + MOVUPS X10, (16*8)(SP) \ + MOVUPS X11, (18*8)(SP) \ + MOVUPS X12, (20*8)(SP) \ + MOVUPS X13, (22*8)(SP) \ + MOVUPS X14, (24*8)(SP) \ + MOVUPS X15, (26*8)(SP) + +#define POP_REGS_HOST_TO_ABI0() \ + MOVQ (0*0)(SP), DI \ + MOVQ (1*8)(SP), SI \ + MOVQ (2*8)(SP), BP \ + MOVQ (3*8)(SP), BX \ + MOVQ (4*8)(SP), R12 \ + MOVQ (5*8)(SP), R13 \ + MOVQ (6*8)(SP), R14 \ + MOVQ (7*8)(SP), R15 \ + MOVUPS (8*8)(SP), X6 \ + MOVUPS (10*8)(SP), X7 \ + MOVUPS (12*8)(SP), X8 \ + MOVUPS (14*8)(SP), X9 \ + MOVUPS (16*8)(SP), X10 \ + MOVUPS (18*8)(SP), X11 \ + MOVUPS (20*8)(SP), X12 \ + MOVUPS (22*8)(SP), X13 \ + MOVUPS (24*8)(SP), X14 \ + MOVUPS (26*8)(SP), X15 \ + ADJSP $-(REGS_HOST_TO_ABI0_STACK - 8) \ + POPFQ + +#else +// SysV ABI + +#define REGS_HOST_TO_ABI0_STACK (6*8) + +// SysV MXCSR matches the Go ABI, so we don't have to set that, +// and Go doesn't modify it, so we don't have to save it. +// Both SysV and Go require DF to be cleared, so that's already clear. +// The SysV and Go frame pointer conventions are compatible. +#define PUSH_REGS_HOST_TO_ABI0() \ + ADJSP $(REGS_HOST_TO_ABI0_STACK) \ + MOVQ BP, (5*8)(SP) \ + LEAQ (5*8)(SP), BP \ + MOVQ BX, (0*8)(SP) \ + MOVQ R12, (1*8)(SP) \ + MOVQ R13, (2*8)(SP) \ + MOVQ R14, (3*8)(SP) \ + MOVQ R15, (4*8)(SP) + +#define POP_REGS_HOST_TO_ABI0() \ + MOVQ (0*8)(SP), BX \ + MOVQ (1*8)(SP), R12 \ + MOVQ (2*8)(SP), R13 \ + MOVQ (3*8)(SP), R14 \ + MOVQ (4*8)(SP), R15 \ + MOVQ (5*8)(SP), BP \ + ADJSP $-(REGS_HOST_TO_ABI0_STACK) + +#endif diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/abi_arm64.h b/vendor/github.com/ebitengine/purego/internal/fakecgo/abi_arm64.h new file mode 100644 index 00000000000..5d5061ec1db --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/abi_arm64.h @@ -0,0 +1,39 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Macros for transitioning from the host ABI to Go ABI0. +// +// These macros save and restore the callee-saved registers +// from the stack, but they don't adjust stack pointer, so +// the user should prepare stack space in advance. +// SAVE_R19_TO_R28(offset) saves R19 ~ R28 to the stack space +// of ((offset)+0*8)(RSP) ~ ((offset)+9*8)(RSP). +// +// SAVE_F8_TO_F15(offset) saves F8 ~ F15 to the stack space +// of ((offset)+0*8)(RSP) ~ ((offset)+7*8)(RSP). +// +// R29 is not saved because Go will save and restore it. + +#define SAVE_R19_TO_R28(offset) \ + STP (R19, R20), ((offset)+0*8)(RSP) \ + STP (R21, R22), ((offset)+2*8)(RSP) \ + STP (R23, R24), ((offset)+4*8)(RSP) \ + STP (R25, R26), ((offset)+6*8)(RSP) \ + STP (R27, g), ((offset)+8*8)(RSP) +#define RESTORE_R19_TO_R28(offset) \ + LDP ((offset)+0*8)(RSP), (R19, R20) \ + LDP ((offset)+2*8)(RSP), (R21, R22) \ + LDP ((offset)+4*8)(RSP), (R23, R24) \ + LDP ((offset)+6*8)(RSP), (R25, R26) \ + LDP ((offset)+8*8)(RSP), (R27, g) /* R28 */ +#define SAVE_F8_TO_F15(offset) \ + FSTPD (F8, F9), ((offset)+0*8)(RSP) \ + FSTPD (F10, F11), ((offset)+2*8)(RSP) \ + FSTPD (F12, F13), ((offset)+4*8)(RSP) \ + FSTPD (F14, F15), ((offset)+6*8)(RSP) +#define RESTORE_F8_TO_F15(offset) \ + FLDPD ((offset)+0*8)(RSP), (F8, F9) \ + FLDPD ((offset)+2*8)(RSP), (F10, F11) \ + FLDPD ((offset)+4*8)(RSP), (F12, F13) \ + FLDPD ((offset)+6*8)(RSP), (F14, F15) diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/asm_amd64.s b/vendor/github.com/ebitengine/purego/internal/fakecgo/asm_amd64.s new file mode 100644 index 00000000000..2b7eb57f8ae --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/asm_amd64.s @@ -0,0 +1,39 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" +#include "abi_amd64.h" + +// Called by C code generated by cmd/cgo. +// func crosscall2(fn, a unsafe.Pointer, n int32, ctxt uintptr) +// Saves C callee-saved registers and calls cgocallback with three arguments. +// fn is the PC of a func(a unsafe.Pointer) function. +// This signature is known to SWIG, so we can't change it. +TEXT crosscall2(SB), NOSPLIT, $0-0 + PUSH_REGS_HOST_TO_ABI0() + + // Make room for arguments to cgocallback. + ADJSP $0x18 + +#ifndef GOOS_windows + MOVQ DI, 0x0(SP) // fn + MOVQ SI, 0x8(SP) // arg + + // Skip n in DX. + MOVQ CX, 0x10(SP) // ctxt + +#else + MOVQ CX, 0x0(SP) // fn + MOVQ DX, 0x8(SP) // arg + + // Skip n in R8. + MOVQ R9, 0x10(SP) // ctxt + +#endif + + CALL runtime·cgocallback(SB) + + ADJSP $-0x18 + POP_REGS_HOST_TO_ABI0() + RET diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/asm_arm64.s b/vendor/github.com/ebitengine/purego/internal/fakecgo/asm_arm64.s new file mode 100644 index 00000000000..50e5261d922 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/asm_arm64.s @@ -0,0 +1,36 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" +#include "abi_arm64.h" + +// Called by C code generated by cmd/cgo. +// func crosscall2(fn, a unsafe.Pointer, n int32, ctxt uintptr) +// Saves C callee-saved registers and calls cgocallback with three arguments. +// fn is the PC of a func(a unsafe.Pointer) function. +TEXT crosscall2(SB), NOSPLIT|NOFRAME, $0 +/* + * We still need to save all callee save register as before, and then + * push 3 args for fn (R0, R1, R3), skipping R2. + * Also note that at procedure entry in gc world, 8(RSP) will be the + * first arg. + */ + SUB $(8*24), RSP + STP (R0, R1), (8*1)(RSP) + MOVD R3, (8*3)(RSP) + + SAVE_R19_TO_R28(8*4) + SAVE_F8_TO_F15(8*14) + STP (R29, R30), (8*22)(RSP) + + // Initialize Go ABI environment + BL runtime·load_g(SB) + BL runtime·cgocallback(SB) + + RESTORE_R19_TO_R28(8*4) + RESTORE_F8_TO_F15(8*14) + LDP (8*22)(RSP), (R29, R30) + + ADD $(8*24), RSP + RET diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/callbacks.go b/vendor/github.com/ebitengine/purego/internal/fakecgo/callbacks.go new file mode 100644 index 00000000000..f29e690cc15 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/callbacks.go @@ -0,0 +1,93 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !cgo && (darwin || freebsd || linux) + +package fakecgo + +import ( + _ "unsafe" +) + +// TODO: decide if we need _runtime_cgo_panic_internal + +//go:linkname x_cgo_init_trampoline x_cgo_init_trampoline +//go:linkname _cgo_init _cgo_init +var x_cgo_init_trampoline byte +var _cgo_init = &x_cgo_init_trampoline + +// Creates a new system thread without updating any Go state. +// +// This method is invoked during shared library loading to create a new OS +// thread to perform the runtime initialization. This method is similar to +// _cgo_sys_thread_start except that it doesn't update any Go state. + +//go:linkname x_cgo_thread_start_trampoline x_cgo_thread_start_trampoline +//go:linkname _cgo_thread_start _cgo_thread_start +var x_cgo_thread_start_trampoline byte +var _cgo_thread_start = &x_cgo_thread_start_trampoline + +// Notifies that the runtime has been initialized. +// +// We currently block at every CGO entry point (via _cgo_wait_runtime_init_done) +// to ensure that the runtime has been initialized before the CGO call is +// executed. This is necessary for shared libraries where we kickoff runtime +// initialization in a separate thread and return without waiting for this +// thread to complete the init. + +//go:linkname x_cgo_notify_runtime_init_done_trampoline x_cgo_notify_runtime_init_done_trampoline +//go:linkname _cgo_notify_runtime_init_done _cgo_notify_runtime_init_done +var x_cgo_notify_runtime_init_done_trampoline byte +var _cgo_notify_runtime_init_done = &x_cgo_notify_runtime_init_done_trampoline + +// Indicates whether a dummy thread key has been created or not. +// +// When calling go exported function from C, we register a destructor +// callback, for a dummy thread key, by using pthread_key_create. + +//go:linkname _cgo_pthread_key_created _cgo_pthread_key_created +var x_cgo_pthread_key_created uintptr +var _cgo_pthread_key_created = &x_cgo_pthread_key_created + +// Set the x_crosscall2_ptr C function pointer variable point to crosscall2. +// It's for the runtime package to call at init time. +func set_crosscall2() { + // nothing needs to be done here for fakecgo + // because it's possible to just call cgocallback directly +} + +//go:linkname _set_crosscall2 runtime.set_crosscall2 +var _set_crosscall2 = set_crosscall2 + +// Store the g into the thread-specific value. +// So that pthread_key_destructor will dropm when the thread is exiting. + +//go:linkname x_cgo_bindm_trampoline x_cgo_bindm_trampoline +//go:linkname _cgo_bindm _cgo_bindm +var x_cgo_bindm_trampoline byte +var _cgo_bindm = &x_cgo_bindm_trampoline + +// TODO: decide if we need x_cgo_set_context_function +// TODO: decide if we need _cgo_yield + +var ( + // In Go 1.20 the race detector was rewritten to pure Go + // on darwin. This means that when CGO_ENABLED=0 is set + // fakecgo is built with race detector code. This is not + // good since this code is pretending to be C. The go:norace + // pragma is not enough, since it only applies to the native + // ABIInternal function. The ABIO wrapper (which is necessary, + // since all references to text symbols from assembly will use it) + // does not inherit the go:norace pragma, so it will still be + // instrumented by the race detector. + // + // To circumvent this issue, using closure calls in the + // assembly, which forces the compiler to use the ABIInternal + // native implementation (which has go:norace) instead. + threadentry_call = threadentry + x_cgo_init_call = x_cgo_init + x_cgo_setenv_call = x_cgo_setenv + x_cgo_unsetenv_call = x_cgo_unsetenv + x_cgo_thread_start_call = x_cgo_thread_start +) diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/doc.go b/vendor/github.com/ebitengine/purego/internal/fakecgo/doc.go new file mode 100644 index 00000000000..be82f7dfca9 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/doc.go @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +//go:build !cgo && (darwin || freebsd || linux) + +// Package fakecgo implements the Cgo runtime (runtime/cgo) entirely in Go. +// This allows code that calls into C to function properly when CGO_ENABLED=0. +// +// # Goals +// +// fakecgo attempts to replicate the same naming structure as in the runtime. +// For example, functions that have the prefix "gcc_*" are named "go_*". +// This makes it easier to port other GOOSs and GOARCHs as well as to keep +// it in sync with runtime/cgo. +// +// # Support +// +// Currently, fakecgo only supports macOS on amd64 & arm64. It also cannot +// be used with -buildmode=c-archive because that requires special initialization +// that fakecgo does not implement at the moment. +// +// # Usage +// +// Using fakecgo is easy just import _ "github.com/ebitengine/purego" and then +// set the environment variable CGO_ENABLED=0. +// The recommended usage for fakecgo is to prefer using runtime/cgo if possible +// but if cross-compiling or fast build times are important fakecgo is available. +// Purego will pick which ever Cgo runtime is available and prefer the one that +// comes with Go (runtime/cgo). +package fakecgo + +//go:generate go run gen.go diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/freebsd.go b/vendor/github.com/ebitengine/purego/internal/fakecgo/freebsd.go new file mode 100644 index 00000000000..bb73a709e69 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/freebsd.go @@ -0,0 +1,27 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build freebsd && !cgo + +package fakecgo + +import _ "unsafe" // for go:linkname + +// Supply environ and __progname, because we don't +// link against the standard FreeBSD crt0.o and the +// libc dynamic library needs them. + +// Note: when building with cross-compiling or CGO_ENABLED=0, add +// the following argument to `go` so that these symbols are defined by +// making fakecgo the Cgo. +// -gcflags="github.com/ebitengine/purego/internal/fakecgo=-std" + +//go:linkname _environ environ +//go:linkname _progname __progname + +//go:cgo_export_dynamic environ +//go:cgo_export_dynamic __progname + +var _environ uintptr +var _progname uintptr diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/go_darwin_amd64.go b/vendor/github.com/ebitengine/purego/internal/fakecgo/go_darwin_amd64.go new file mode 100644 index 00000000000..39f5ff1f06a --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/go_darwin_amd64.go @@ -0,0 +1,73 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !cgo + +package fakecgo + +import "unsafe" + +//go:nosplit +//go:norace +func _cgo_sys_thread_start(ts *ThreadStart) { + var attr pthread_attr_t + var ign, oset sigset_t + var p pthread_t + var size size_t + var err int + + sigfillset(&ign) + pthread_sigmask(SIG_SETMASK, &ign, &oset) + + size = pthread_get_stacksize_np(pthread_self()) + pthread_attr_init(&attr) + pthread_attr_setstacksize(&attr, size) + // Leave stacklo=0 and set stackhi=size; mstart will do the rest. + ts.g.stackhi = uintptr(size) + + err = _cgo_try_pthread_create(&p, &attr, unsafe.Pointer(threadentry_trampolineABI0), ts) + + pthread_sigmask(SIG_SETMASK, &oset, nil) + + if err != 0 { + print("fakecgo: pthread_create failed: ") + println(err) + abort() + } +} + +// threadentry_trampolineABI0 maps the C ABI to Go ABI then calls the Go function +// +//go:linkname x_threadentry_trampoline threadentry_trampoline +var x_threadentry_trampoline byte +var threadentry_trampolineABI0 = &x_threadentry_trampoline + +//go:nosplit +//go:norace +func threadentry(v unsafe.Pointer) unsafe.Pointer { + ts := *(*ThreadStart)(v) + free(v) + + setg_trampoline(setg_func, uintptr(unsafe.Pointer(ts.g))) + + // faking funcs in go is a bit a... involved - but the following works :) + fn := uintptr(unsafe.Pointer(&ts.fn)) + (*(*func())(unsafe.Pointer(&fn)))() + + return nil +} + +// here we will store a pointer to the provided setg func +var setg_func uintptr + +//go:nosplit +//go:norace +func x_cgo_init(g *G, setg uintptr) { + var size size_t + + setg_func = setg + + size = pthread_get_stacksize_np(pthread_self()) + g.stacklo = uintptr(unsafe.Add(unsafe.Pointer(&size), -size+4096)) +} diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/go_darwin_arm64.go b/vendor/github.com/ebitengine/purego/internal/fakecgo/go_darwin_arm64.go new file mode 100644 index 00000000000..d0868f0f790 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/go_darwin_arm64.go @@ -0,0 +1,88 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !cgo + +package fakecgo + +import "unsafe" + +//go:nosplit +//go:norace +func _cgo_sys_thread_start(ts *ThreadStart) { + var attr pthread_attr_t + var ign, oset sigset_t + var p pthread_t + var size size_t + var err int + + sigfillset(&ign) + pthread_sigmask(SIG_SETMASK, &ign, &oset) + + size = pthread_get_stacksize_np(pthread_self()) + pthread_attr_init(&attr) + pthread_attr_setstacksize(&attr, size) + // Leave stacklo=0 and set stackhi=size; mstart will do the rest. + ts.g.stackhi = uintptr(size) + + err = _cgo_try_pthread_create(&p, &attr, unsafe.Pointer(threadentry_trampolineABI0), ts) + + pthread_sigmask(SIG_SETMASK, &oset, nil) + + if err != 0 { + print("fakecgo: pthread_create failed: ") + println(err) + abort() + } +} + +// threadentry_trampolineABI0 maps the C ABI to Go ABI then calls the Go function +// +//go:linkname x_threadentry_trampoline threadentry_trampoline +var x_threadentry_trampoline byte +var threadentry_trampolineABI0 = &x_threadentry_trampoline + +//go:nosplit +//go:norace +func threadentry(v unsafe.Pointer) unsafe.Pointer { + ts := *(*ThreadStart)(v) + free(v) + + // TODO: support ios + //#if TARGET_OS_IPHONE + // darwin_arm_init_thread_exception_port(); + //#endif + setg_trampoline(setg_func, uintptr(unsafe.Pointer(ts.g))) + + // faking funcs in go is a bit a... involved - but the following works :) + fn := uintptr(unsafe.Pointer(&ts.fn)) + (*(*func())(unsafe.Pointer(&fn)))() + + return nil +} + +// here we will store a pointer to the provided setg func +var setg_func uintptr + +// x_cgo_init(G *g, void (*setg)(void*)) (runtime/cgo/gcc_linux_amd64.c) +// This get's called during startup, adjusts stacklo, and provides a pointer to setg_gcc for us +// Additionally, if we set _cgo_init to non-null, go won't do it's own TLS setup +// This function can't be go:systemstack since go is not in a state where the systemcheck would work. +// +//go:nosplit +//go:norace +func x_cgo_init(g *G, setg uintptr) { + var size size_t + + setg_func = setg + size = pthread_get_stacksize_np(pthread_self()) + g.stacklo = uintptr(unsafe.Add(unsafe.Pointer(&size), -size+4096)) + + //TODO: support ios + //#if TARGET_OS_IPHONE + // darwin_arm_init_mach_exception_handler(); + // darwin_arm_init_thread_exception_port(); + // init_working_dir(); + //#endif +} diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/go_freebsd_amd64.go b/vendor/github.com/ebitengine/purego/internal/fakecgo/go_freebsd_amd64.go new file mode 100644 index 00000000000..c9ff7156a89 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/go_freebsd_amd64.go @@ -0,0 +1,95 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !cgo + +package fakecgo + +import "unsafe" + +//go:nosplit +func _cgo_sys_thread_start(ts *ThreadStart) { + var attr pthread_attr_t + var ign, oset sigset_t + var p pthread_t + var size size_t + var err int + + //fprintf(stderr, "runtime/cgo: _cgo_sys_thread_start: fn=%p, g=%p\n", ts->fn, ts->g); // debug + sigfillset(&ign) + pthread_sigmask(SIG_SETMASK, &ign, &oset) + + pthread_attr_init(&attr) + pthread_attr_getstacksize(&attr, &size) + // Leave stacklo=0 and set stackhi=size; mstart will do the rest. + ts.g.stackhi = uintptr(size) + + err = _cgo_try_pthread_create(&p, &attr, unsafe.Pointer(threadentry_trampolineABI0), ts) + + pthread_sigmask(SIG_SETMASK, &oset, nil) + + if err != 0 { + print("fakecgo: pthread_create failed: ") + println(err) + abort() + } +} + +// threadentry_trampolineABI0 maps the C ABI to Go ABI then calls the Go function +// +//go:linkname x_threadentry_trampoline threadentry_trampoline +var x_threadentry_trampoline byte +var threadentry_trampolineABI0 = &x_threadentry_trampoline + +//go:nosplit +func threadentry(v unsafe.Pointer) unsafe.Pointer { + ts := *(*ThreadStart)(v) + free(v) + + setg_trampoline(setg_func, uintptr(unsafe.Pointer(ts.g))) + + // faking funcs in go is a bit a... involved - but the following works :) + fn := uintptr(unsafe.Pointer(&ts.fn)) + (*(*func())(unsafe.Pointer(&fn)))() + + return nil +} + +// here we will store a pointer to the provided setg func +var setg_func uintptr + +//go:nosplit +func x_cgo_init(g *G, setg uintptr) { + var size size_t + var attr *pthread_attr_t + + /* The memory sanitizer distributed with versions of clang + before 3.8 has a bug: if you call mmap before malloc, mmap + may return an address that is later overwritten by the msan + library. Avoid this problem by forcing a call to malloc + here, before we ever call malloc. + + This is only required for the memory sanitizer, so it's + unfortunate that we always run it. It should be possible + to remove this when we no longer care about versions of + clang before 3.8. The test for this is + misc/cgo/testsanitizers. + + GCC works hard to eliminate a seemingly unnecessary call to + malloc, so we actually use the memory we allocate. */ + + setg_func = setg + attr = (*pthread_attr_t)(malloc(unsafe.Sizeof(*attr))) + if attr == nil { + println("fakecgo: malloc failed") + abort() + } + pthread_attr_init(attr) + pthread_attr_getstacksize(attr, &size) + // runtime/cgo uses __builtin_frame_address(0) instead of `uintptr(unsafe.Pointer(&size))` + // but this should be OK since we are taking the address of the first variable in this function. + g.stacklo = uintptr(unsafe.Pointer(&size)) - uintptr(size) + 4096 + pthread_attr_destroy(attr) + free(unsafe.Pointer(attr)) +} diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/go_freebsd_arm64.go b/vendor/github.com/ebitengine/purego/internal/fakecgo/go_freebsd_arm64.go new file mode 100644 index 00000000000..e3a060b9350 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/go_freebsd_arm64.go @@ -0,0 +1,98 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !cgo + +package fakecgo + +import "unsafe" + +//go:nosplit +func _cgo_sys_thread_start(ts *ThreadStart) { + var attr pthread_attr_t + var ign, oset sigset_t + var p pthread_t + var size size_t + var err int + + // fprintf(stderr, "runtime/cgo: _cgo_sys_thread_start: fn=%p, g=%p\n", ts->fn, ts->g); // debug + sigfillset(&ign) + pthread_sigmask(SIG_SETMASK, &ign, &oset) + + pthread_attr_init(&attr) + pthread_attr_getstacksize(&attr, &size) + // Leave stacklo=0 and set stackhi=size; mstart will do the rest. + ts.g.stackhi = uintptr(size) + + err = _cgo_try_pthread_create(&p, &attr, unsafe.Pointer(threadentry_trampolineABI0), ts) + + pthread_sigmask(SIG_SETMASK, &oset, nil) + + if err != 0 { + print("fakecgo: pthread_create failed: ") + println(err) + abort() + } +} + +// threadentry_trampolineABI0 maps the C ABI to Go ABI then calls the Go function +// +//go:linkname x_threadentry_trampoline threadentry_trampoline +var x_threadentry_trampoline byte +var threadentry_trampolineABI0 = &x_threadentry_trampoline + +//go:nosplit +func threadentry(v unsafe.Pointer) unsafe.Pointer { + ts := *(*ThreadStart)(v) + free(v) + + setg_trampoline(setg_func, uintptr(unsafe.Pointer(ts.g))) + + // faking funcs in go is a bit a... involved - but the following works :) + fn := uintptr(unsafe.Pointer(&ts.fn)) + (*(*func())(unsafe.Pointer(&fn)))() + + return nil +} + +// here we will store a pointer to the provided setg func +var setg_func uintptr + +// x_cgo_init(G *g, void (*setg)(void*)) (runtime/cgo/gcc_linux_amd64.c) +// This get's called during startup, adjusts stacklo, and provides a pointer to setg_gcc for us +// Additionally, if we set _cgo_init to non-null, go won't do it's own TLS setup +// This function can't be go:systemstack since go is not in a state where the systemcheck would work. +// +//go:nosplit +func x_cgo_init(g *G, setg uintptr) { + var size size_t + var attr *pthread_attr_t + + /* The memory sanitizer distributed with versions of clang + before 3.8 has a bug: if you call mmap before malloc, mmap + may return an address that is later overwritten by the msan + library. Avoid this problem by forcing a call to malloc + here, before we ever call malloc. + + This is only required for the memory sanitizer, so it's + unfortunate that we always run it. It should be possible + to remove this when we no longer care about versions of + clang before 3.8. The test for this is + misc/cgo/testsanitizers. + + GCC works hard to eliminate a seemingly unnecessary call to + malloc, so we actually use the memory we allocate. */ + + setg_func = setg + attr = (*pthread_attr_t)(malloc(unsafe.Sizeof(*attr))) + if attr == nil { + println("fakecgo: malloc failed") + abort() + } + pthread_attr_init(attr) + pthread_attr_getstacksize(attr, &size) + g.stacklo = uintptr(unsafe.Pointer(&size)) - uintptr(size) + 4096 + pthread_attr_destroy(attr) + free(unsafe.Pointer(attr)) +} diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/go_libinit.go b/vendor/github.com/ebitengine/purego/internal/fakecgo/go_libinit.go new file mode 100644 index 00000000000..e5cb46be455 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/go_libinit.go @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +//go:build !cgo && (darwin || freebsd || linux) + +package fakecgo + +import ( + "syscall" + "unsafe" +) + +var ( + pthread_g pthread_key_t + + runtime_init_cond = PTHREAD_COND_INITIALIZER + runtime_init_mu = PTHREAD_MUTEX_INITIALIZER + runtime_init_done int +) + +//go:nosplit +func x_cgo_notify_runtime_init_done() { + pthread_mutex_lock(&runtime_init_mu) + runtime_init_done = 1 + pthread_cond_broadcast(&runtime_init_cond) + pthread_mutex_unlock(&runtime_init_mu) +} + +// Store the g into a thread-specific value associated with the pthread key pthread_g. +// And pthread_key_destructor will dropm when the thread is exiting. +func x_cgo_bindm(g unsafe.Pointer) { + // We assume this will always succeed, otherwise, there might be extra M leaking, + // when a C thread exits after a cgo call. + // We only invoke this function once per thread in runtime.needAndBindM, + // and the next calls just reuse the bound m. + pthread_setspecific(pthread_g, g) +} + +// _cgo_try_pthread_create retries pthread_create if it fails with +// EAGAIN. +// +//go:nosplit +//go:norace +func _cgo_try_pthread_create(thread *pthread_t, attr *pthread_attr_t, pfn unsafe.Pointer, arg *ThreadStart) int { + var ts syscall.Timespec + // tries needs to be the same type as syscall.Timespec.Nsec + // but the fields are int32 on 32bit and int64 on 64bit. + // tries is assigned to syscall.Timespec.Nsec in order to match its type. + tries := ts.Nsec + var err int + + for tries = 0; tries < 20; tries++ { + err = int(pthread_create(thread, attr, pfn, unsafe.Pointer(arg))) + if err == 0 { + pthread_detach(*thread) + return 0 + } + if err != int(syscall.EAGAIN) { + return err + } + ts.Sec = 0 + ts.Nsec = (tries + 1) * 1000 * 1000 // Milliseconds. + nanosleep(&ts, nil) + } + return int(syscall.EAGAIN) +} diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/go_linux_amd64.go b/vendor/github.com/ebitengine/purego/internal/fakecgo/go_linux_amd64.go new file mode 100644 index 00000000000..c9ff7156a89 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/go_linux_amd64.go @@ -0,0 +1,95 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !cgo + +package fakecgo + +import "unsafe" + +//go:nosplit +func _cgo_sys_thread_start(ts *ThreadStart) { + var attr pthread_attr_t + var ign, oset sigset_t + var p pthread_t + var size size_t + var err int + + //fprintf(stderr, "runtime/cgo: _cgo_sys_thread_start: fn=%p, g=%p\n", ts->fn, ts->g); // debug + sigfillset(&ign) + pthread_sigmask(SIG_SETMASK, &ign, &oset) + + pthread_attr_init(&attr) + pthread_attr_getstacksize(&attr, &size) + // Leave stacklo=0 and set stackhi=size; mstart will do the rest. + ts.g.stackhi = uintptr(size) + + err = _cgo_try_pthread_create(&p, &attr, unsafe.Pointer(threadentry_trampolineABI0), ts) + + pthread_sigmask(SIG_SETMASK, &oset, nil) + + if err != 0 { + print("fakecgo: pthread_create failed: ") + println(err) + abort() + } +} + +// threadentry_trampolineABI0 maps the C ABI to Go ABI then calls the Go function +// +//go:linkname x_threadentry_trampoline threadentry_trampoline +var x_threadentry_trampoline byte +var threadentry_trampolineABI0 = &x_threadentry_trampoline + +//go:nosplit +func threadentry(v unsafe.Pointer) unsafe.Pointer { + ts := *(*ThreadStart)(v) + free(v) + + setg_trampoline(setg_func, uintptr(unsafe.Pointer(ts.g))) + + // faking funcs in go is a bit a... involved - but the following works :) + fn := uintptr(unsafe.Pointer(&ts.fn)) + (*(*func())(unsafe.Pointer(&fn)))() + + return nil +} + +// here we will store a pointer to the provided setg func +var setg_func uintptr + +//go:nosplit +func x_cgo_init(g *G, setg uintptr) { + var size size_t + var attr *pthread_attr_t + + /* The memory sanitizer distributed with versions of clang + before 3.8 has a bug: if you call mmap before malloc, mmap + may return an address that is later overwritten by the msan + library. Avoid this problem by forcing a call to malloc + here, before we ever call malloc. + + This is only required for the memory sanitizer, so it's + unfortunate that we always run it. It should be possible + to remove this when we no longer care about versions of + clang before 3.8. The test for this is + misc/cgo/testsanitizers. + + GCC works hard to eliminate a seemingly unnecessary call to + malloc, so we actually use the memory we allocate. */ + + setg_func = setg + attr = (*pthread_attr_t)(malloc(unsafe.Sizeof(*attr))) + if attr == nil { + println("fakecgo: malloc failed") + abort() + } + pthread_attr_init(attr) + pthread_attr_getstacksize(attr, &size) + // runtime/cgo uses __builtin_frame_address(0) instead of `uintptr(unsafe.Pointer(&size))` + // but this should be OK since we are taking the address of the first variable in this function. + g.stacklo = uintptr(unsafe.Pointer(&size)) - uintptr(size) + 4096 + pthread_attr_destroy(attr) + free(unsafe.Pointer(attr)) +} diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/go_linux_arm64.go b/vendor/github.com/ebitengine/purego/internal/fakecgo/go_linux_arm64.go new file mode 100644 index 00000000000..a3b1cca59a0 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/go_linux_arm64.go @@ -0,0 +1,98 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !cgo + +package fakecgo + +import "unsafe" + +//go:nosplit +func _cgo_sys_thread_start(ts *ThreadStart) { + var attr pthread_attr_t + var ign, oset sigset_t + var p pthread_t + var size size_t + var err int + + //fprintf(stderr, "runtime/cgo: _cgo_sys_thread_start: fn=%p, g=%p\n", ts->fn, ts->g); // debug + sigfillset(&ign) + pthread_sigmask(SIG_SETMASK, &ign, &oset) + + pthread_attr_init(&attr) + pthread_attr_getstacksize(&attr, &size) + // Leave stacklo=0 and set stackhi=size; mstart will do the rest. + ts.g.stackhi = uintptr(size) + + err = _cgo_try_pthread_create(&p, &attr, unsafe.Pointer(threadentry_trampolineABI0), ts) + + pthread_sigmask(SIG_SETMASK, &oset, nil) + + if err != 0 { + print("fakecgo: pthread_create failed: ") + println(err) + abort() + } +} + +// threadentry_trampolineABI0 maps the C ABI to Go ABI then calls the Go function +// +//go:linkname x_threadentry_trampoline threadentry_trampoline +var x_threadentry_trampoline byte +var threadentry_trampolineABI0 = &x_threadentry_trampoline + +//go:nosplit +func threadentry(v unsafe.Pointer) unsafe.Pointer { + ts := *(*ThreadStart)(v) + free(v) + + setg_trampoline(setg_func, uintptr(unsafe.Pointer(ts.g))) + + // faking funcs in go is a bit a... involved - but the following works :) + fn := uintptr(unsafe.Pointer(&ts.fn)) + (*(*func())(unsafe.Pointer(&fn)))() + + return nil +} + +// here we will store a pointer to the provided setg func +var setg_func uintptr + +// x_cgo_init(G *g, void (*setg)(void*)) (runtime/cgo/gcc_linux_amd64.c) +// This get's called during startup, adjusts stacklo, and provides a pointer to setg_gcc for us +// Additionally, if we set _cgo_init to non-null, go won't do it's own TLS setup +// This function can't be go:systemstack since go is not in a state where the systemcheck would work. +// +//go:nosplit +func x_cgo_init(g *G, setg uintptr) { + var size size_t + var attr *pthread_attr_t + + /* The memory sanitizer distributed with versions of clang + before 3.8 has a bug: if you call mmap before malloc, mmap + may return an address that is later overwritten by the msan + library. Avoid this problem by forcing a call to malloc + here, before we ever call malloc. + + This is only required for the memory sanitizer, so it's + unfortunate that we always run it. It should be possible + to remove this when we no longer care about versions of + clang before 3.8. The test for this is + misc/cgo/testsanitizers. + + GCC works hard to eliminate a seemingly unnecessary call to + malloc, so we actually use the memory we allocate. */ + + setg_func = setg + attr = (*pthread_attr_t)(malloc(unsafe.Sizeof(*attr))) + if attr == nil { + println("fakecgo: malloc failed") + abort() + } + pthread_attr_init(attr) + pthread_attr_getstacksize(attr, &size) + g.stacklo = uintptr(unsafe.Pointer(&size)) - uintptr(size) + 4096 + pthread_attr_destroy(attr) + free(unsafe.Pointer(attr)) +} diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/go_setenv.go b/vendor/github.com/ebitengine/purego/internal/fakecgo/go_setenv.go new file mode 100644 index 00000000000..e42d84f0b75 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/go_setenv.go @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +//go:build !cgo && (darwin || freebsd || linux) + +package fakecgo + +//go:nosplit +//go:norace +func x_cgo_setenv(arg *[2]*byte) { + setenv(arg[0], arg[1], 1) +} + +//go:nosplit +//go:norace +func x_cgo_unsetenv(arg *[1]*byte) { + unsetenv(arg[0]) +} diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/go_util.go b/vendor/github.com/ebitengine/purego/internal/fakecgo/go_util.go new file mode 100644 index 00000000000..0ac10d1f157 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/go_util.go @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +//go:build !cgo && (darwin || freebsd || linux) + +package fakecgo + +import "unsafe" + +// _cgo_thread_start is split into three parts in cgo since only one part is system dependent (keep it here for easier handling) + +// _cgo_thread_start(ThreadStart *arg) (runtime/cgo/gcc_util.c) +// This get's called instead of the go code for creating new threads +// -> pthread_* stuff is used, so threads are setup correctly for C +// If this is missing, TLS is only setup correctly on thread 1! +// This function should be go:systemstack instead of go:nosplit (but that requires runtime) +// +//go:nosplit +//go:norace +func x_cgo_thread_start(arg *ThreadStart) { + var ts *ThreadStart + // Make our own copy that can persist after we return. + // _cgo_tsan_acquire(); + ts = (*ThreadStart)(malloc(unsafe.Sizeof(*ts))) + // _cgo_tsan_release(); + if ts == nil { + println("fakecgo: out of memory in thread_start") + abort() + } + // *ts = *arg would cause a writebarrier so copy using slices + s1 := unsafe.Slice((*uintptr)(unsafe.Pointer(ts)), unsafe.Sizeof(*ts)/8) + s2 := unsafe.Slice((*uintptr)(unsafe.Pointer(arg)), unsafe.Sizeof(*arg)/8) + for i := range s2 { + s1[i] = s2[i] + } + _cgo_sys_thread_start(ts) // OS-dependent half +} diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/iscgo.go b/vendor/github.com/ebitengine/purego/internal/fakecgo/iscgo.go new file mode 100644 index 00000000000..28af41cc640 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/iscgo.go @@ -0,0 +1,19 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !cgo && (darwin || freebsd || linux) + +// The runtime package contains an uninitialized definition +// for runtime·iscgo. Override it to tell the runtime we're here. +// There are various function pointers that should be set too, +// but those depend on dynamic linker magic to get initialized +// correctly, and sometimes they break. This variable is a +// backup: it depends only on old C style static linking rules. + +package fakecgo + +import _ "unsafe" // for go:linkname + +//go:linkname _iscgo runtime.iscgo +var _iscgo bool = true diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/libcgo.go b/vendor/github.com/ebitengine/purego/internal/fakecgo/libcgo.go new file mode 100644 index 00000000000..74626c64a0e --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/libcgo.go @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +//go:build !cgo && (darwin || freebsd || linux) + +package fakecgo + +type ( + size_t uintptr + sigset_t [128]byte + pthread_attr_t [64]byte + pthread_t int + pthread_key_t uint64 +) + +// for pthread_sigmask: + +type sighow int32 + +const ( + SIG_BLOCK sighow = 0 + SIG_UNBLOCK sighow = 1 + SIG_SETMASK sighow = 2 +) + +type G struct { + stacklo uintptr + stackhi uintptr +} + +type ThreadStart struct { + g *G + tls *uintptr + fn uintptr +} diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/libcgo_darwin.go b/vendor/github.com/ebitengine/purego/internal/fakecgo/libcgo_darwin.go new file mode 100644 index 00000000000..af148333f6d --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/libcgo_darwin.go @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +//go:build !cgo + +package fakecgo + +type ( + pthread_mutex_t struct { + sig int64 + opaque [56]byte + } + pthread_cond_t struct { + sig int64 + opaque [40]byte + } +) + +var ( + PTHREAD_COND_INITIALIZER = pthread_cond_t{sig: 0x3CB0B1BB} + PTHREAD_MUTEX_INITIALIZER = pthread_mutex_t{sig: 0x32AAABA7} +) diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/libcgo_freebsd.go b/vendor/github.com/ebitengine/purego/internal/fakecgo/libcgo_freebsd.go new file mode 100644 index 00000000000..ca1f722c939 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/libcgo_freebsd.go @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +//go:build !cgo + +package fakecgo + +type ( + pthread_cond_t uintptr + pthread_mutex_t uintptr +) + +var ( + PTHREAD_COND_INITIALIZER = pthread_cond_t(0) + PTHREAD_MUTEX_INITIALIZER = pthread_mutex_t(0) +) diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/libcgo_linux.go b/vendor/github.com/ebitengine/purego/internal/fakecgo/libcgo_linux.go new file mode 100644 index 00000000000..c4b6e9ea5a4 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/libcgo_linux.go @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +//go:build !cgo + +package fakecgo + +type ( + pthread_cond_t [48]byte + pthread_mutex_t [48]byte +) + +var ( + PTHREAD_COND_INITIALIZER = pthread_cond_t{} + PTHREAD_MUTEX_INITIALIZER = pthread_mutex_t{} +) diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/setenv.go b/vendor/github.com/ebitengine/purego/internal/fakecgo/setenv.go new file mode 100644 index 00000000000..f30af0e1515 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/setenv.go @@ -0,0 +1,19 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !cgo && (darwin || freebsd || linux) + +package fakecgo + +import _ "unsafe" // for go:linkname + +//go:linkname x_cgo_setenv_trampoline x_cgo_setenv_trampoline +//go:linkname _cgo_setenv runtime._cgo_setenv +var x_cgo_setenv_trampoline byte +var _cgo_setenv = &x_cgo_setenv_trampoline + +//go:linkname x_cgo_unsetenv_trampoline x_cgo_unsetenv_trampoline +//go:linkname _cgo_unsetenv runtime._cgo_unsetenv +var x_cgo_unsetenv_trampoline byte +var _cgo_unsetenv = &x_cgo_unsetenv_trampoline diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/symbols.go b/vendor/github.com/ebitengine/purego/internal/fakecgo/symbols.go new file mode 100644 index 00000000000..3d19fd822a7 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/symbols.go @@ -0,0 +1,181 @@ +// Code generated by 'go generate' with gen.go. DO NOT EDIT. + +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +//go:build !cgo && (darwin || freebsd || linux) + +package fakecgo + +import ( + "syscall" + "unsafe" +) + +// setg_trampoline calls setg with the G provided +func setg_trampoline(setg uintptr, G uintptr) + +// call5 takes fn the C function and 5 arguments and calls the function with those arguments +func call5(fn, a1, a2, a3, a4, a5 uintptr) uintptr + +func malloc(size uintptr) unsafe.Pointer { + ret := call5(mallocABI0, uintptr(size), 0, 0, 0, 0) + // this indirection is to avoid go vet complaining about possible misuse of unsafe.Pointer + return *(*unsafe.Pointer)(unsafe.Pointer(&ret)) +} + +func free(ptr unsafe.Pointer) { + call5(freeABI0, uintptr(ptr), 0, 0, 0, 0) +} + +func setenv(name *byte, value *byte, overwrite int32) int32 { + return int32(call5(setenvABI0, uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(value)), uintptr(overwrite), 0, 0)) +} + +func unsetenv(name *byte) int32 { + return int32(call5(unsetenvABI0, uintptr(unsafe.Pointer(name)), 0, 0, 0, 0)) +} + +func sigfillset(set *sigset_t) int32 { + return int32(call5(sigfillsetABI0, uintptr(unsafe.Pointer(set)), 0, 0, 0, 0)) +} + +func nanosleep(ts *syscall.Timespec, rem *syscall.Timespec) int32 { + return int32(call5(nanosleepABI0, uintptr(unsafe.Pointer(ts)), uintptr(unsafe.Pointer(rem)), 0, 0, 0)) +} + +func abort() { + call5(abortABI0, 0, 0, 0, 0, 0) +} + +func pthread_attr_init(attr *pthread_attr_t) int32 { + return int32(call5(pthread_attr_initABI0, uintptr(unsafe.Pointer(attr)), 0, 0, 0, 0)) +} + +func pthread_create(thread *pthread_t, attr *pthread_attr_t, start unsafe.Pointer, arg unsafe.Pointer) int32 { + return int32(call5(pthread_createABI0, uintptr(unsafe.Pointer(thread)), uintptr(unsafe.Pointer(attr)), uintptr(start), uintptr(arg), 0)) +} + +func pthread_detach(thread pthread_t) int32 { + return int32(call5(pthread_detachABI0, uintptr(thread), 0, 0, 0, 0)) +} + +func pthread_sigmask(how sighow, ign *sigset_t, oset *sigset_t) int32 { + return int32(call5(pthread_sigmaskABI0, uintptr(how), uintptr(unsafe.Pointer(ign)), uintptr(unsafe.Pointer(oset)), 0, 0)) +} + +func pthread_self() pthread_t { + return pthread_t(call5(pthread_selfABI0, 0, 0, 0, 0, 0)) +} + +func pthread_get_stacksize_np(thread pthread_t) size_t { + return size_t(call5(pthread_get_stacksize_npABI0, uintptr(thread), 0, 0, 0, 0)) +} + +func pthread_attr_getstacksize(attr *pthread_attr_t, stacksize *size_t) int32 { + return int32(call5(pthread_attr_getstacksizeABI0, uintptr(unsafe.Pointer(attr)), uintptr(unsafe.Pointer(stacksize)), 0, 0, 0)) +} + +func pthread_attr_setstacksize(attr *pthread_attr_t, size size_t) int32 { + return int32(call5(pthread_attr_setstacksizeABI0, uintptr(unsafe.Pointer(attr)), uintptr(size), 0, 0, 0)) +} + +func pthread_attr_destroy(attr *pthread_attr_t) int32 { + return int32(call5(pthread_attr_destroyABI0, uintptr(unsafe.Pointer(attr)), 0, 0, 0, 0)) +} + +func pthread_mutex_lock(mutex *pthread_mutex_t) int32 { + return int32(call5(pthread_mutex_lockABI0, uintptr(unsafe.Pointer(mutex)), 0, 0, 0, 0)) +} + +func pthread_mutex_unlock(mutex *pthread_mutex_t) int32 { + return int32(call5(pthread_mutex_unlockABI0, uintptr(unsafe.Pointer(mutex)), 0, 0, 0, 0)) +} + +func pthread_cond_broadcast(cond *pthread_cond_t) int32 { + return int32(call5(pthread_cond_broadcastABI0, uintptr(unsafe.Pointer(cond)), 0, 0, 0, 0)) +} + +func pthread_setspecific(key pthread_key_t, value unsafe.Pointer) int32 { + return int32(call5(pthread_setspecificABI0, uintptr(key), uintptr(value), 0, 0, 0)) +} + +//go:linkname _malloc _malloc +var _malloc uintptr +var mallocABI0 = uintptr(unsafe.Pointer(&_malloc)) + +//go:linkname _free _free +var _free uintptr +var freeABI0 = uintptr(unsafe.Pointer(&_free)) + +//go:linkname _setenv _setenv +var _setenv uintptr +var setenvABI0 = uintptr(unsafe.Pointer(&_setenv)) + +//go:linkname _unsetenv _unsetenv +var _unsetenv uintptr +var unsetenvABI0 = uintptr(unsafe.Pointer(&_unsetenv)) + +//go:linkname _sigfillset _sigfillset +var _sigfillset uintptr +var sigfillsetABI0 = uintptr(unsafe.Pointer(&_sigfillset)) + +//go:linkname _nanosleep _nanosleep +var _nanosleep uintptr +var nanosleepABI0 = uintptr(unsafe.Pointer(&_nanosleep)) + +//go:linkname _abort _abort +var _abort uintptr +var abortABI0 = uintptr(unsafe.Pointer(&_abort)) + +//go:linkname _pthread_attr_init _pthread_attr_init +var _pthread_attr_init uintptr +var pthread_attr_initABI0 = uintptr(unsafe.Pointer(&_pthread_attr_init)) + +//go:linkname _pthread_create _pthread_create +var _pthread_create uintptr +var pthread_createABI0 = uintptr(unsafe.Pointer(&_pthread_create)) + +//go:linkname _pthread_detach _pthread_detach +var _pthread_detach uintptr +var pthread_detachABI0 = uintptr(unsafe.Pointer(&_pthread_detach)) + +//go:linkname _pthread_sigmask _pthread_sigmask +var _pthread_sigmask uintptr +var pthread_sigmaskABI0 = uintptr(unsafe.Pointer(&_pthread_sigmask)) + +//go:linkname _pthread_self _pthread_self +var _pthread_self uintptr +var pthread_selfABI0 = uintptr(unsafe.Pointer(&_pthread_self)) + +//go:linkname _pthread_get_stacksize_np _pthread_get_stacksize_np +var _pthread_get_stacksize_np uintptr +var pthread_get_stacksize_npABI0 = uintptr(unsafe.Pointer(&_pthread_get_stacksize_np)) + +//go:linkname _pthread_attr_getstacksize _pthread_attr_getstacksize +var _pthread_attr_getstacksize uintptr +var pthread_attr_getstacksizeABI0 = uintptr(unsafe.Pointer(&_pthread_attr_getstacksize)) + +//go:linkname _pthread_attr_setstacksize _pthread_attr_setstacksize +var _pthread_attr_setstacksize uintptr +var pthread_attr_setstacksizeABI0 = uintptr(unsafe.Pointer(&_pthread_attr_setstacksize)) + +//go:linkname _pthread_attr_destroy _pthread_attr_destroy +var _pthread_attr_destroy uintptr +var pthread_attr_destroyABI0 = uintptr(unsafe.Pointer(&_pthread_attr_destroy)) + +//go:linkname _pthread_mutex_lock _pthread_mutex_lock +var _pthread_mutex_lock uintptr +var pthread_mutex_lockABI0 = uintptr(unsafe.Pointer(&_pthread_mutex_lock)) + +//go:linkname _pthread_mutex_unlock _pthread_mutex_unlock +var _pthread_mutex_unlock uintptr +var pthread_mutex_unlockABI0 = uintptr(unsafe.Pointer(&_pthread_mutex_unlock)) + +//go:linkname _pthread_cond_broadcast _pthread_cond_broadcast +var _pthread_cond_broadcast uintptr +var pthread_cond_broadcastABI0 = uintptr(unsafe.Pointer(&_pthread_cond_broadcast)) + +//go:linkname _pthread_setspecific _pthread_setspecific +var _pthread_setspecific uintptr +var pthread_setspecificABI0 = uintptr(unsafe.Pointer(&_pthread_setspecific)) diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/symbols_darwin.go b/vendor/github.com/ebitengine/purego/internal/fakecgo/symbols_darwin.go new file mode 100644 index 00000000000..54aaa46285c --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/symbols_darwin.go @@ -0,0 +1,29 @@ +// Code generated by 'go generate' with gen.go. DO NOT EDIT. + +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +//go:build !cgo + +package fakecgo + +//go:cgo_import_dynamic purego_malloc malloc "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic purego_free free "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic purego_setenv setenv "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic purego_unsetenv unsetenv "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic purego_sigfillset sigfillset "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic purego_nanosleep nanosleep "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic purego_abort abort "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic purego_pthread_attr_init pthread_attr_init "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic purego_pthread_create pthread_create "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic purego_pthread_detach pthread_detach "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic purego_pthread_sigmask pthread_sigmask "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic purego_pthread_self pthread_self "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic purego_pthread_get_stacksize_np pthread_get_stacksize_np "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic purego_pthread_attr_getstacksize pthread_attr_getstacksize "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic purego_pthread_attr_setstacksize pthread_attr_setstacksize "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic purego_pthread_attr_destroy pthread_attr_destroy "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic purego_pthread_mutex_lock pthread_mutex_lock "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic purego_pthread_mutex_unlock pthread_mutex_unlock "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic purego_pthread_cond_broadcast pthread_cond_broadcast "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic purego_pthread_setspecific pthread_setspecific "/usr/lib/libSystem.B.dylib" diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/symbols_freebsd.go b/vendor/github.com/ebitengine/purego/internal/fakecgo/symbols_freebsd.go new file mode 100644 index 00000000000..81538119799 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/symbols_freebsd.go @@ -0,0 +1,29 @@ +// Code generated by 'go generate' with gen.go. DO NOT EDIT. + +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +//go:build !cgo + +package fakecgo + +//go:cgo_import_dynamic purego_malloc malloc "libc.so.7" +//go:cgo_import_dynamic purego_free free "libc.so.7" +//go:cgo_import_dynamic purego_setenv setenv "libc.so.7" +//go:cgo_import_dynamic purego_unsetenv unsetenv "libc.so.7" +//go:cgo_import_dynamic purego_sigfillset sigfillset "libc.so.7" +//go:cgo_import_dynamic purego_nanosleep nanosleep "libc.so.7" +//go:cgo_import_dynamic purego_abort abort "libc.so.7" +//go:cgo_import_dynamic purego_pthread_attr_init pthread_attr_init "libpthread.so" +//go:cgo_import_dynamic purego_pthread_create pthread_create "libpthread.so" +//go:cgo_import_dynamic purego_pthread_detach pthread_detach "libpthread.so" +//go:cgo_import_dynamic purego_pthread_sigmask pthread_sigmask "libpthread.so" +//go:cgo_import_dynamic purego_pthread_self pthread_self "libpthread.so" +//go:cgo_import_dynamic purego_pthread_get_stacksize_np pthread_get_stacksize_np "libpthread.so" +//go:cgo_import_dynamic purego_pthread_attr_getstacksize pthread_attr_getstacksize "libpthread.so" +//go:cgo_import_dynamic purego_pthread_attr_setstacksize pthread_attr_setstacksize "libpthread.so" +//go:cgo_import_dynamic purego_pthread_attr_destroy pthread_attr_destroy "libpthread.so" +//go:cgo_import_dynamic purego_pthread_mutex_lock pthread_mutex_lock "libpthread.so" +//go:cgo_import_dynamic purego_pthread_mutex_unlock pthread_mutex_unlock "libpthread.so" +//go:cgo_import_dynamic purego_pthread_cond_broadcast pthread_cond_broadcast "libpthread.so" +//go:cgo_import_dynamic purego_pthread_setspecific pthread_setspecific "libpthread.so" diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/symbols_linux.go b/vendor/github.com/ebitengine/purego/internal/fakecgo/symbols_linux.go new file mode 100644 index 00000000000..180057d0156 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/symbols_linux.go @@ -0,0 +1,29 @@ +// Code generated by 'go generate' with gen.go. DO NOT EDIT. + +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +//go:build !cgo + +package fakecgo + +//go:cgo_import_dynamic purego_malloc malloc "libc.so.6" +//go:cgo_import_dynamic purego_free free "libc.so.6" +//go:cgo_import_dynamic purego_setenv setenv "libc.so.6" +//go:cgo_import_dynamic purego_unsetenv unsetenv "libc.so.6" +//go:cgo_import_dynamic purego_sigfillset sigfillset "libc.so.6" +//go:cgo_import_dynamic purego_nanosleep nanosleep "libc.so.6" +//go:cgo_import_dynamic purego_abort abort "libc.so.6" +//go:cgo_import_dynamic purego_pthread_attr_init pthread_attr_init "libpthread.so.0" +//go:cgo_import_dynamic purego_pthread_create pthread_create "libpthread.so.0" +//go:cgo_import_dynamic purego_pthread_detach pthread_detach "libpthread.so.0" +//go:cgo_import_dynamic purego_pthread_sigmask pthread_sigmask "libpthread.so.0" +//go:cgo_import_dynamic purego_pthread_self pthread_self "libpthread.so.0" +//go:cgo_import_dynamic purego_pthread_get_stacksize_np pthread_get_stacksize_np "libpthread.so.0" +//go:cgo_import_dynamic purego_pthread_attr_getstacksize pthread_attr_getstacksize "libpthread.so.0" +//go:cgo_import_dynamic purego_pthread_attr_setstacksize pthread_attr_setstacksize "libpthread.so.0" +//go:cgo_import_dynamic purego_pthread_attr_destroy pthread_attr_destroy "libpthread.so.0" +//go:cgo_import_dynamic purego_pthread_mutex_lock pthread_mutex_lock "libpthread.so.0" +//go:cgo_import_dynamic purego_pthread_mutex_unlock pthread_mutex_unlock "libpthread.so.0" +//go:cgo_import_dynamic purego_pthread_cond_broadcast pthread_cond_broadcast "libpthread.so.0" +//go:cgo_import_dynamic purego_pthread_setspecific pthread_setspecific "libpthread.so.0" diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/trampolines_amd64.s b/vendor/github.com/ebitengine/purego/internal/fakecgo/trampolines_amd64.s new file mode 100644 index 00000000000..c9a3cc09eb3 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/trampolines_amd64.s @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +//go:build !cgo && (darwin || linux || freebsd) + +/* +trampoline for emulating required C functions for cgo in go (see cgo.go) +(we convert cdecl calling convention to go and vice-versa) + +Since we're called from go and call into C we can cheat a bit with the calling conventions: + - in go all the registers are caller saved + - in C we have a couple of callee saved registers + +=> we can use BX, R12, R13, R14, R15 instead of the stack + +C Calling convention cdecl used here (we only need integer args): +1. arg: DI +2. arg: SI +3. arg: DX +4. arg: CX +5. arg: R8 +6. arg: R9 +We don't need floats with these functions -> AX=0 +return value will be in AX +*/ +#include "textflag.h" +#include "go_asm.h" + +// these trampolines map the gcc ABI to Go ABI and then calls into the Go equivalent functions. + +TEXT x_cgo_init_trampoline(SB), NOSPLIT, $16 + MOVQ DI, AX + MOVQ SI, BX + MOVQ ·x_cgo_init_call(SB), DX + MOVQ (DX), CX + CALL CX + RET + +TEXT x_cgo_thread_start_trampoline(SB), NOSPLIT, $8 + MOVQ DI, AX + MOVQ ·x_cgo_thread_start_call(SB), DX + MOVQ (DX), CX + CALL CX + RET + +TEXT x_cgo_setenv_trampoline(SB), NOSPLIT, $8 + MOVQ DI, AX + MOVQ ·x_cgo_setenv_call(SB), DX + MOVQ (DX), CX + CALL CX + RET + +TEXT x_cgo_unsetenv_trampoline(SB), NOSPLIT, $8 + MOVQ DI, AX + MOVQ ·x_cgo_unsetenv_call(SB), DX + MOVQ (DX), CX + CALL CX + RET + +TEXT x_cgo_notify_runtime_init_done_trampoline(SB), NOSPLIT, $0 + CALL ·x_cgo_notify_runtime_init_done(SB) + RET + +TEXT x_cgo_bindm_trampoline(SB), NOSPLIT, $0 + CALL ·x_cgo_bindm(SB) + RET + +// func setg_trampoline(setg uintptr, g uintptr) +TEXT ·setg_trampoline(SB), NOSPLIT, $0-16 + MOVQ G+8(FP), DI + MOVQ setg+0(FP), BX + XORL AX, AX + CALL BX + RET + +TEXT threadentry_trampoline(SB), NOSPLIT, $16 + MOVQ DI, AX + MOVQ ·threadentry_call(SB), DX + MOVQ (DX), CX + CALL CX + RET + +TEXT ·call5(SB), NOSPLIT, $0-56 + MOVQ fn+0(FP), BX + MOVQ a1+8(FP), DI + MOVQ a2+16(FP), SI + MOVQ a3+24(FP), DX + MOVQ a4+32(FP), CX + MOVQ a5+40(FP), R8 + + XORL AX, AX // no floats + + PUSHQ BP // save BP + MOVQ SP, BP // save SP inside BP bc BP is callee-saved + SUBQ $16, SP // allocate space for alignment + ANDQ $-16, SP // align on 16 bytes for SSE + + CALL BX + + MOVQ BP, SP // get SP back + POPQ BP // restore BP + + MOVQ AX, ret+48(FP) + RET diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/trampolines_arm64.s b/vendor/github.com/ebitengine/purego/internal/fakecgo/trampolines_arm64.s new file mode 100644 index 00000000000..9dbdbc0139d --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/trampolines_arm64.s @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +//go:build !cgo && (darwin || freebsd || linux) + +#include "textflag.h" +#include "go_asm.h" + +// these trampolines map the gcc ABI to Go ABI and then calls into the Go equivalent functions. + +TEXT x_cgo_init_trampoline(SB), NOSPLIT, $0-0 + MOVD R0, 8(RSP) + MOVD R1, 16(RSP) + MOVD ·x_cgo_init_call(SB), R26 + MOVD (R26), R2 + CALL (R2) + RET + +TEXT x_cgo_thread_start_trampoline(SB), NOSPLIT, $0-0 + MOVD R0, 8(RSP) + MOVD ·x_cgo_thread_start_call(SB), R26 + MOVD (R26), R2 + CALL (R2) + RET + +TEXT x_cgo_setenv_trampoline(SB), NOSPLIT, $0-0 + MOVD R0, 8(RSP) + MOVD ·x_cgo_setenv_call(SB), R26 + MOVD (R26), R2 + CALL (R2) + RET + +TEXT x_cgo_unsetenv_trampoline(SB), NOSPLIT, $0-0 + MOVD R0, 8(RSP) + MOVD ·x_cgo_unsetenv_call(SB), R26 + MOVD (R26), R2 + CALL (R2) + RET + +TEXT x_cgo_notify_runtime_init_done_trampoline(SB), NOSPLIT, $0-0 + CALL ·x_cgo_notify_runtime_init_done(SB) + RET + +TEXT x_cgo_bindm_trampoline(SB), NOSPLIT, $0 + CALL ·x_cgo_bindm(SB) + RET + +// func setg_trampoline(setg uintptr, g uintptr) +TEXT ·setg_trampoline(SB), NOSPLIT, $0-16 + MOVD G+8(FP), R0 + MOVD setg+0(FP), R1 + CALL R1 + RET + +TEXT threadentry_trampoline(SB), NOSPLIT, $0-0 + MOVD R0, 8(RSP) + MOVD ·threadentry_call(SB), R26 + MOVD (R26), R2 + CALL (R2) + MOVD $0, R0 // TODO: get the return value from threadentry + RET + +TEXT ·call5(SB), NOSPLIT, $0-0 + MOVD fn+0(FP), R6 + MOVD a1+8(FP), R0 + MOVD a2+16(FP), R1 + MOVD a3+24(FP), R2 + MOVD a4+32(FP), R3 + MOVD a5+40(FP), R4 + CALL R6 + MOVD R0, ret+48(FP) + RET diff --git a/vendor/github.com/ebitengine/purego/internal/fakecgo/trampolines_stubs.s b/vendor/github.com/ebitengine/purego/internal/fakecgo/trampolines_stubs.s new file mode 100644 index 00000000000..a65b2012c1b --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/fakecgo/trampolines_stubs.s @@ -0,0 +1,90 @@ +// Code generated by 'go generate' with gen.go. DO NOT EDIT. + +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +//go:build !cgo && (darwin || freebsd || linux) + +#include "textflag.h" + +// these stubs are here because it is not possible to go:linkname directly the C functions on darwin arm64 + +TEXT _malloc(SB), NOSPLIT|NOFRAME, $0-0 + JMP purego_malloc(SB) + RET + +TEXT _free(SB), NOSPLIT|NOFRAME, $0-0 + JMP purego_free(SB) + RET + +TEXT _setenv(SB), NOSPLIT|NOFRAME, $0-0 + JMP purego_setenv(SB) + RET + +TEXT _unsetenv(SB), NOSPLIT|NOFRAME, $0-0 + JMP purego_unsetenv(SB) + RET + +TEXT _sigfillset(SB), NOSPLIT|NOFRAME, $0-0 + JMP purego_sigfillset(SB) + RET + +TEXT _nanosleep(SB), NOSPLIT|NOFRAME, $0-0 + JMP purego_nanosleep(SB) + RET + +TEXT _abort(SB), NOSPLIT|NOFRAME, $0-0 + JMP purego_abort(SB) + RET + +TEXT _pthread_attr_init(SB), NOSPLIT|NOFRAME, $0-0 + JMP purego_pthread_attr_init(SB) + RET + +TEXT _pthread_create(SB), NOSPLIT|NOFRAME, $0-0 + JMP purego_pthread_create(SB) + RET + +TEXT _pthread_detach(SB), NOSPLIT|NOFRAME, $0-0 + JMP purego_pthread_detach(SB) + RET + +TEXT _pthread_sigmask(SB), NOSPLIT|NOFRAME, $0-0 + JMP purego_pthread_sigmask(SB) + RET + +TEXT _pthread_self(SB), NOSPLIT|NOFRAME, $0-0 + JMP purego_pthread_self(SB) + RET + +TEXT _pthread_get_stacksize_np(SB), NOSPLIT|NOFRAME, $0-0 + JMP purego_pthread_get_stacksize_np(SB) + RET + +TEXT _pthread_attr_getstacksize(SB), NOSPLIT|NOFRAME, $0-0 + JMP purego_pthread_attr_getstacksize(SB) + RET + +TEXT _pthread_attr_setstacksize(SB), NOSPLIT|NOFRAME, $0-0 + JMP purego_pthread_attr_setstacksize(SB) + RET + +TEXT _pthread_attr_destroy(SB), NOSPLIT|NOFRAME, $0-0 + JMP purego_pthread_attr_destroy(SB) + RET + +TEXT _pthread_mutex_lock(SB), NOSPLIT|NOFRAME, $0-0 + JMP purego_pthread_mutex_lock(SB) + RET + +TEXT _pthread_mutex_unlock(SB), NOSPLIT|NOFRAME, $0-0 + JMP purego_pthread_mutex_unlock(SB) + RET + +TEXT _pthread_cond_broadcast(SB), NOSPLIT|NOFRAME, $0-0 + JMP purego_pthread_cond_broadcast(SB) + RET + +TEXT _pthread_setspecific(SB), NOSPLIT|NOFRAME, $0-0 + JMP purego_pthread_setspecific(SB) + RET diff --git a/vendor/github.com/ebitengine/purego/internal/strings/strings.go b/vendor/github.com/ebitengine/purego/internal/strings/strings.go new file mode 100644 index 00000000000..5b0d2522554 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/internal/strings/strings.go @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +package strings + +import ( + "unsafe" +) + +// hasSuffix tests whether the string s ends with suffix. +func hasSuffix(s, suffix string) bool { + return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix +} + +// CString converts a go string to *byte that can be passed to C code. +func CString(name string) *byte { + if hasSuffix(name, "\x00") { + return &(*(*[]byte)(unsafe.Pointer(&name)))[0] + } + b := make([]byte, len(name)+1) + copy(b, name) + return &b[0] +} + +// GoString copies a null-terminated char* to a Go string. +func GoString(c uintptr) string { + // We take the address and then dereference it to trick go vet from creating a possible misuse of unsafe.Pointer + ptr := *(*unsafe.Pointer)(unsafe.Pointer(&c)) + if ptr == nil { + return "" + } + var length int + for { + if *(*byte)(unsafe.Add(ptr, uintptr(length))) == '\x00' { + break + } + length++ + } + return string(unsafe.Slice((*byte)(ptr), length)) +} diff --git a/vendor/github.com/ebitengine/purego/is_ios.go b/vendor/github.com/ebitengine/purego/is_ios.go new file mode 100644 index 00000000000..ed31da97824 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/is_ios.go @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +//go:build !cgo + +package purego + +// if you are getting this error it means that you have +// CGO_ENABLED=0 while trying to build for ios. +// purego does not support this mode yet. +// the fix is to set CGO_ENABLED=1 which will require +// a C compiler. +var _ = _PUREGO_REQUIRES_CGO_ON_IOS diff --git a/vendor/github.com/ebitengine/purego/nocgo.go b/vendor/github.com/ebitengine/purego/nocgo.go new file mode 100644 index 00000000000..5b989ea814e --- /dev/null +++ b/vendor/github.com/ebitengine/purego/nocgo.go @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +//go:build !cgo && (darwin || freebsd || linux) + +package purego + +// if CGO_ENABLED=0 import fakecgo to setup the Cgo runtime correctly. +// This is required since some frameworks need TLS setup the C way which Go doesn't do. +// We currently don't support ios in fakecgo mode so force Cgo or fail +// +// The way that the Cgo runtime (runtime/cgo) works is by setting some variables found +// in runtime with non-null GCC compiled functions. The variables that are replaced are +// var ( +// iscgo bool // in runtime/cgo.go +// _cgo_init unsafe.Pointer // in runtime/cgo.go +// _cgo_thread_start unsafe.Pointer // in runtime/cgo.go +// _cgo_notify_runtime_init_done unsafe.Pointer // in runtime/cgo.go +// _cgo_setenv unsafe.Pointer // in runtime/env_posix.go +// _cgo_unsetenv unsafe.Pointer // in runtime/env_posix.go +// ) +// importing fakecgo will set these (using //go:linkname) with functions written +// entirely in Go (except for some assembly trampolines to change GCC ABI to Go ABI). +// Doing so makes it possible to build applications that call into C without CGO_ENABLED=1. +import _ "github.com/ebitengine/purego/internal/fakecgo" diff --git a/vendor/github.com/ebitengine/purego/struct_amd64.go b/vendor/github.com/ebitengine/purego/struct_amd64.go new file mode 100644 index 00000000000..06a82dd8c5b --- /dev/null +++ b/vendor/github.com/ebitengine/purego/struct_amd64.go @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 The Ebitengine Authors + +package purego + +import ( + "math" + "reflect" + "unsafe" +) + +func getStruct(outType reflect.Type, syscall syscall15Args) (v reflect.Value) { + outSize := outType.Size() + switch { + case outSize == 0: + return reflect.New(outType).Elem() + case outSize <= 8: + if isAllFloats(outType) { + // 2 float32s or 1 float64s are return in the float register + return reflect.NewAt(outType, unsafe.Pointer(&struct{ a uintptr }{syscall.f1})).Elem() + } + // up to 8 bytes is returned in RAX + return reflect.NewAt(outType, unsafe.Pointer(&struct{ a uintptr }{syscall.a1})).Elem() + case outSize <= 16: + r1, r2 := syscall.a1, syscall.a2 + if isAllFloats(outType) { + r1 = syscall.f1 + r2 = syscall.f2 + } else { + // check first 8 bytes if it's floats + hasFirstFloat := false + f1 := outType.Field(0).Type + if f1.Kind() == reflect.Float64 || f1.Kind() == reflect.Float32 && outType.Field(1).Type.Kind() == reflect.Float32 { + r1 = syscall.f1 + hasFirstFloat = true + } + + // find index of the field that starts the second 8 bytes + var i int + for i = 0; i < outType.NumField(); i++ { + if outType.Field(i).Offset == 8 { + break + } + } + + // check last 8 bytes if they are floats + f1 = outType.Field(i).Type + if f1.Kind() == reflect.Float64 || f1.Kind() == reflect.Float32 && i+1 == outType.NumField() { + r2 = syscall.f1 + } else if hasFirstFloat { + // if the first field was a float then that means the second integer field + // comes from the first integer register + r2 = syscall.a1 + } + } + return reflect.NewAt(outType, unsafe.Pointer(&struct{ a, b uintptr }{r1, r2})).Elem() + default: + // create struct from the Go pointer created above + // weird pointer dereference to circumvent go vet + return reflect.NewAt(outType, *(*unsafe.Pointer)(unsafe.Pointer(&syscall.a1))).Elem() + } +} + +func isAllFloats(ty reflect.Type) bool { + for i := 0; i < ty.NumField(); i++ { + f := ty.Field(i) + switch f.Type.Kind() { + case reflect.Float64, reflect.Float32: + default: + return false + } + } + return true +} + +// https://refspecs.linuxbase.org/elf/x86_64-abi-0.99.pdf +// https://gitlab.com/x86-psABIs/x86-64-ABI +// Class determines where the 8 byte value goes. +// Higher value classes win over lower value classes +const ( + _NO_CLASS = 0b0000 + _SSE = 0b0001 + _X87 = 0b0011 // long double not used in Go + _INTEGER = 0b0111 + _MEMORY = 0b1111 +) + +func addStruct(v reflect.Value, numInts, numFloats, numStack *int, addInt, addFloat, addStack func(uintptr), keepAlive []interface{}) []interface{} { + if v.Type().Size() == 0 { + return keepAlive + } + + // if greater than 64 bytes place on stack + if v.Type().Size() > 8*8 { + placeStack(v, addStack) + return keepAlive + } + var ( + savedNumFloats = *numFloats + savedNumInts = *numInts + savedNumStack = *numStack + ) + placeOnStack := postMerger(v.Type()) || !tryPlaceRegister(v, addFloat, addInt) + if placeOnStack { + // reset any values placed in registers + *numFloats = savedNumFloats + *numInts = savedNumInts + *numStack = savedNumStack + placeStack(v, addStack) + } + return keepAlive +} + +func postMerger(t reflect.Type) bool { + // (c) If the size of the aggregate exceeds two eightbytes and the first eight- byte isn’t SSE or any other + // eightbyte isn’t SSEUP, the whole argument is passed in memory. + if t.Kind() != reflect.Struct { + return false + } + if t.Size() <= 2*8 { + return false + } + first := getFirst(t).Kind() + if first != reflect.Float32 && first != reflect.Float64 { + return false + } + return true +} + +func getFirst(t reflect.Type) reflect.Type { + first := t.Field(0).Type + if first.Kind() == reflect.Struct { + return getFirst(first) + } + return first +} + +func tryPlaceRegister(v reflect.Value, addFloat func(uintptr), addInt func(uintptr)) (ok bool) { + ok = true + var val uint64 + var shift byte // # of bits to shift + var flushed bool + class := _NO_CLASS + flushIfNeeded := func() { + if flushed { + return + } + flushed = true + if class == _SSE { + addFloat(uintptr(val)) + } else { + addInt(uintptr(val)) + } + val = 0 + shift = 0 + class = _NO_CLASS + } + var place func(v reflect.Value) + place = func(v reflect.Value) { + var numFields int + if v.Kind() == reflect.Struct { + numFields = v.Type().NumField() + } else { + numFields = v.Type().Len() + } + + for i := 0; i < numFields; i++ { + flushed = false + var f reflect.Value + if v.Kind() == reflect.Struct { + f = v.Field(i) + } else { + f = v.Index(i) + } + switch f.Kind() { + case reflect.Struct: + place(f) + case reflect.Bool: + if f.Bool() { + val |= 1 + } + shift += 8 + class |= _INTEGER + case reflect.Pointer: + ok = false + return + case reflect.Int8: + val |= uint64(f.Int()&0xFF) << shift + shift += 8 + class |= _INTEGER + case reflect.Int16: + val |= uint64(f.Int()&0xFFFF) << shift + shift += 16 + class |= _INTEGER + case reflect.Int32: + val |= uint64(f.Int()&0xFFFF_FFFF) << shift + shift += 32 + class |= _INTEGER + case reflect.Int64: + val = uint64(f.Int()) + shift = 64 + class = _INTEGER + case reflect.Uint8: + val |= f.Uint() << shift + shift += 8 + class |= _INTEGER + case reflect.Uint16: + val |= f.Uint() << shift + shift += 16 + class |= _INTEGER + case reflect.Uint32: + val |= f.Uint() << shift + shift += 32 + class |= _INTEGER + case reflect.Uint64: + val = f.Uint() + shift = 64 + class = _INTEGER + case reflect.Float32: + val |= uint64(math.Float32bits(float32(f.Float()))) << shift + shift += 32 + class |= _SSE + case reflect.Float64: + if v.Type().Size() > 16 { + ok = false + return + } + val = uint64(math.Float64bits(f.Float())) + shift = 64 + class = _SSE + case reflect.Array: + place(f) + default: + panic("purego: unsupported kind " + f.Kind().String()) + } + + if shift == 64 { + flushIfNeeded() + } else if shift > 64 { + // Should never happen, but may if we forget to reset shift after flush (or forget to flush), + // better fall apart here, than corrupt arguments. + panic("purego: tryPlaceRegisters shift > 64") + } + } + } + + place(v) + flushIfNeeded() + return ok +} + +func placeStack(v reflect.Value, addStack func(uintptr)) { + for i := 0; i < v.Type().NumField(); i++ { + f := v.Field(i) + switch f.Kind() { + case reflect.Pointer: + addStack(f.Pointer()) + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + addStack(uintptr(f.Int())) + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + addStack(uintptr(f.Uint())) + case reflect.Float32: + addStack(uintptr(math.Float32bits(float32(f.Float())))) + case reflect.Float64: + addStack(uintptr(math.Float64bits(f.Float()))) + case reflect.Struct: + placeStack(f, addStack) + default: + panic("purego: unsupported kind " + f.Kind().String()) + } + } +} diff --git a/vendor/github.com/ebitengine/purego/struct_arm64.go b/vendor/github.com/ebitengine/purego/struct_arm64.go new file mode 100644 index 00000000000..11c36bd6e47 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/struct_arm64.go @@ -0,0 +1,274 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 The Ebitengine Authors + +package purego + +import ( + "math" + "reflect" + "unsafe" +) + +func getStruct(outType reflect.Type, syscall syscall15Args) (v reflect.Value) { + outSize := outType.Size() + switch { + case outSize == 0: + return reflect.New(outType).Elem() + case outSize <= 8: + r1 := syscall.a1 + if isAllFloats, numFields := isAllSameFloat(outType); isAllFloats { + r1 = syscall.f1 + if numFields == 2 { + r1 = syscall.f2<<32 | syscall.f1 + } + } + return reflect.NewAt(outType, unsafe.Pointer(&struct{ a uintptr }{r1})).Elem() + case outSize <= 16: + r1, r2 := syscall.a1, syscall.a2 + if isAllFloats, numFields := isAllSameFloat(outType); isAllFloats { + switch numFields { + case 4: + r1 = syscall.f2<<32 | syscall.f1 + r2 = syscall.f4<<32 | syscall.f3 + case 3: + r1 = syscall.f2<<32 | syscall.f1 + r2 = syscall.f3 + case 2: + r1 = syscall.f1 + r2 = syscall.f2 + default: + panic("unreachable") + } + } + return reflect.NewAt(outType, unsafe.Pointer(&struct{ a, b uintptr }{r1, r2})).Elem() + default: + if isAllFloats, numFields := isAllSameFloat(outType); isAllFloats && numFields <= 4 { + switch numFields { + case 4: + return reflect.NewAt(outType, unsafe.Pointer(&struct{ a, b, c, d uintptr }{syscall.f1, syscall.f2, syscall.f3, syscall.f4})).Elem() + case 3: + return reflect.NewAt(outType, unsafe.Pointer(&struct{ a, b, c uintptr }{syscall.f1, syscall.f2, syscall.f3})).Elem() + default: + panic("unreachable") + } + } + // create struct from the Go pointer created in arm64_r8 + // weird pointer dereference to circumvent go vet + return reflect.NewAt(outType, *(*unsafe.Pointer)(unsafe.Pointer(&syscall.arm64_r8))).Elem() + } +} + +// https://github.com/ARM-software/abi-aa/blob/main/sysvabi64/sysvabi64.rst +const ( + _NO_CLASS = 0b00 + _FLOAT = 0b01 + _INT = 0b11 +) + +func addStruct(v reflect.Value, numInts, numFloats, numStack *int, addInt, addFloat, addStack func(uintptr), keepAlive []interface{}) []interface{} { + if v.Type().Size() == 0 { + return keepAlive + } + + if hva, hfa, size := isHVA(v.Type()), isHFA(v.Type()), v.Type().Size(); hva || hfa || size <= 16 { + // if this doesn't fit entirely in registers then + // each element goes onto the stack + if hfa && *numFloats+v.NumField() > numOfFloats { + *numFloats = numOfFloats + } else if hva && *numInts+v.NumField() > numOfIntegerRegisters() { + *numInts = numOfIntegerRegisters() + } + + placeRegisters(v, addFloat, addInt) + } else { + keepAlive = placeStack(v, keepAlive, addInt) + } + return keepAlive // the struct was allocated so don't panic +} + +func placeRegisters(v reflect.Value, addFloat func(uintptr), addInt func(uintptr)) { + var val uint64 + var shift byte + var flushed bool + class := _NO_CLASS + var place func(v reflect.Value) + place = func(v reflect.Value) { + var numFields int + if v.Kind() == reflect.Struct { + numFields = v.Type().NumField() + } else { + numFields = v.Type().Len() + } + for k := 0; k < numFields; k++ { + flushed = false + var f reflect.Value + if v.Kind() == reflect.Struct { + f = v.Field(k) + } else { + f = v.Index(k) + } + if shift >= 64 { + shift = 0 + flushed = true + if class == _FLOAT { + addFloat(uintptr(val)) + } else { + addInt(uintptr(val)) + } + } + switch f.Type().Kind() { + case reflect.Struct: + place(f) + case reflect.Bool: + if f.Bool() { + val |= 1 + } + shift += 8 + class |= _INT + case reflect.Uint8: + val |= f.Uint() << shift + shift += 8 + class |= _INT + case reflect.Uint16: + val |= f.Uint() << shift + shift += 16 + class |= _INT + case reflect.Uint32: + val |= f.Uint() << shift + shift += 32 + class |= _INT + case reflect.Uint64: + addInt(uintptr(f.Uint())) + shift = 0 + flushed = true + case reflect.Int8: + val |= uint64(f.Int()&0xFF) << shift + shift += 8 + class |= _INT + case reflect.Int16: + val |= uint64(f.Int()&0xFFFF) << shift + shift += 16 + class |= _INT + case reflect.Int32: + val |= uint64(f.Int()&0xFFFF_FFFF) << shift + shift += 32 + class |= _INT + case reflect.Int64: + addInt(uintptr(f.Int())) + shift = 0 + flushed = true + case reflect.Float32: + if class == _FLOAT { + addFloat(uintptr(val)) + val = 0 + shift = 0 + } + val |= uint64(math.Float32bits(float32(f.Float()))) << shift + shift += 32 + class |= _FLOAT + case reflect.Float64: + addFloat(uintptr(math.Float64bits(float64(f.Float())))) + shift = 0 + flushed = true + case reflect.Array: + place(f) + default: + panic("purego: unsupported kind " + f.Kind().String()) + } + } + } + place(v) + if !flushed { + if class == _FLOAT { + addFloat(uintptr(val)) + } else { + addInt(uintptr(val)) + } + } +} + +func placeStack(v reflect.Value, keepAlive []interface{}, addInt func(uintptr)) []interface{} { + // Struct is too big to be placed in registers. + // Copy to heap and place the pointer in register + ptrStruct := reflect.New(v.Type()) + ptrStruct.Elem().Set(v) + ptr := ptrStruct.Elem().Addr().UnsafePointer() + keepAlive = append(keepAlive, ptr) + addInt(uintptr(ptr)) + return keepAlive +} + +// isHFA reports a Homogeneous Floating-point Aggregate (HFA) which is a Fundamental Data Type that is a +// Floating-Point type and at most four uniquely addressable members (5.9.5.1 in [Arm64 Calling Convention]). +// This type of struct will be placed more compactly than the individual fields. +// +// [Arm64 Calling Convention]: https://github.com/ARM-software/abi-aa/blob/main/sysvabi64/sysvabi64.rst +func isHFA(t reflect.Type) bool { + // round up struct size to nearest 8 see section B.4 + structSize := roundUpTo8(t.Size()) + if structSize == 0 || t.NumField() > 4 { + return false + } + first := t.Field(0) + switch first.Type.Kind() { + case reflect.Float32, reflect.Float64: + firstKind := first.Type.Kind() + for i := 0; i < t.NumField(); i++ { + if t.Field(i).Type.Kind() != firstKind { + return false + } + } + return true + case reflect.Array: + switch first.Type.Elem().Kind() { + case reflect.Float32, reflect.Float64: + return true + default: + return false + } + case reflect.Struct: + for i := 0; i < first.Type.NumField(); i++ { + if !isHFA(first.Type) { + return false + } + } + return true + default: + return false + } +} + +// isHVA reports a Homogeneous Aggregate with a Fundamental Data Type that is a Short-Vector type +// and at most four uniquely addressable members (5.9.5.2 in [Arm64 Calling Convention]). +// A short vector is a machine type that is composed of repeated instances of one fundamental integral or +// floating-point type. It may be 8 or 16 bytes in total size (5.4 in [Arm64 Calling Convention]). +// This type of struct will be placed more compactly than the individual fields. +// +// [Arm64 Calling Convention]: https://github.com/ARM-software/abi-aa/blob/main/sysvabi64/sysvabi64.rst +func isHVA(t reflect.Type) bool { + // round up struct size to nearest 8 see section B.4 + structSize := roundUpTo8(t.Size()) + if structSize == 0 || (structSize != 8 && structSize != 16) { + return false + } + first := t.Field(0) + switch first.Type.Kind() { + case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Int8, reflect.Int16, reflect.Int32: + firstKind := first.Type.Kind() + for i := 0; i < t.NumField(); i++ { + if t.Field(i).Type.Kind() != firstKind { + return false + } + } + return true + case reflect.Array: + switch first.Type.Elem().Kind() { + case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Int8, reflect.Int16, reflect.Int32: + return true + default: + return false + } + default: + return false + } +} diff --git a/vendor/github.com/ebitengine/purego/struct_other.go b/vendor/github.com/ebitengine/purego/struct_other.go new file mode 100644 index 00000000000..9d42adac898 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/struct_other.go @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 The Ebitengine Authors + +//go:build !amd64 && !arm64 + +package purego + +import "reflect" + +func addStruct(v reflect.Value, numInts, numFloats, numStack *int, addInt, addFloat, addStack func(uintptr), keepAlive []interface{}) []interface{} { + panic("purego: struct arguments are not supported") +} + +func getStruct(outType reflect.Type, syscall syscall15Args) (v reflect.Value) { + panic("purego: struct returns are not supported") +} diff --git a/vendor/github.com/ebitengine/purego/sys_amd64.s b/vendor/github.com/ebitengine/purego/sys_amd64.s new file mode 100644 index 00000000000..cabde1a584e --- /dev/null +++ b/vendor/github.com/ebitengine/purego/sys_amd64.s @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +//go:build darwin || freebsd || linux + +#include "textflag.h" +#include "abi_amd64.h" +#include "go_asm.h" +#include "funcdata.h" + +#define STACK_SIZE 80 +#define PTR_ADDRESS (STACK_SIZE - 8) + +// syscall15X calls a function in libc on behalf of the syscall package. +// syscall15X takes a pointer to a struct like: +// struct { +// fn uintptr +// a1 uintptr +// a2 uintptr +// a3 uintptr +// a4 uintptr +// a5 uintptr +// a6 uintptr +// a7 uintptr +// a8 uintptr +// a9 uintptr +// a10 uintptr +// a11 uintptr +// a12 uintptr +// a13 uintptr +// a14 uintptr +// a15 uintptr +// r1 uintptr +// r2 uintptr +// err uintptr +// } +// syscall15X must be called on the g0 stack with the +// C calling convention (use libcCall). +GLOBL ·syscall15XABI0(SB), NOPTR|RODATA, $8 +DATA ·syscall15XABI0(SB)/8, $syscall15X(SB) +TEXT syscall15X(SB), NOSPLIT|NOFRAME, $0 + PUSHQ BP + MOVQ SP, BP + SUBQ $STACK_SIZE, SP + MOVQ DI, PTR_ADDRESS(BP) // save the pointer + MOVQ DI, R11 + + MOVQ syscall15Args_f1(R11), X0 // f1 + MOVQ syscall15Args_f2(R11), X1 // f2 + MOVQ syscall15Args_f3(R11), X2 // f3 + MOVQ syscall15Args_f4(R11), X3 // f4 + MOVQ syscall15Args_f5(R11), X4 // f5 + MOVQ syscall15Args_f6(R11), X5 // f6 + MOVQ syscall15Args_f7(R11), X6 // f7 + MOVQ syscall15Args_f8(R11), X7 // f8 + + MOVQ syscall15Args_a1(R11), DI // a1 + MOVQ syscall15Args_a2(R11), SI // a2 + MOVQ syscall15Args_a3(R11), DX // a3 + MOVQ syscall15Args_a4(R11), CX // a4 + MOVQ syscall15Args_a5(R11), R8 // a5 + MOVQ syscall15Args_a6(R11), R9 // a6 + + // push the remaining paramters onto the stack + MOVQ syscall15Args_a7(R11), R12 + MOVQ R12, 0(SP) // push a7 + MOVQ syscall15Args_a8(R11), R12 + MOVQ R12, 8(SP) // push a8 + MOVQ syscall15Args_a9(R11), R12 + MOVQ R12, 16(SP) // push a9 + MOVQ syscall15Args_a10(R11), R12 + MOVQ R12, 24(SP) // push a10 + MOVQ syscall15Args_a11(R11), R12 + MOVQ R12, 32(SP) // push a11 + MOVQ syscall15Args_a12(R11), R12 + MOVQ R12, 40(SP) // push a12 + MOVQ syscall15Args_a13(R11), R12 + MOVQ R12, 48(SP) // push a13 + MOVQ syscall15Args_a14(R11), R12 + MOVQ R12, 56(SP) // push a14 + MOVQ syscall15Args_a15(R11), R12 + MOVQ R12, 64(SP) // push a15 + XORL AX, AX // vararg: say "no float args" + + MOVQ syscall15Args_fn(R11), R10 // fn + CALL R10 + + MOVQ PTR_ADDRESS(BP), DI // get the pointer back + MOVQ AX, syscall15Args_a1(DI) // r1 + MOVQ DX, syscall15Args_a2(DI) // r3 + MOVQ X0, syscall15Args_f1(DI) // f1 + MOVQ X1, syscall15Args_f2(DI) // f2 + + XORL AX, AX // no error (it's ignored anyway) + ADDQ $STACK_SIZE, SP + MOVQ BP, SP + POPQ BP + RET + +TEXT callbackasm1(SB), NOSPLIT|NOFRAME, $0 + MOVQ 0(SP), AX // save the return address to calculate the cb index + MOVQ 8(SP), R10 // get the return SP so that we can align register args with stack args + ADDQ $8, SP // remove return address from stack, we are not returning to callbackasm, but to its caller. + + // make space for first six int and 8 float arguments below the frame + ADJSP $14*8, SP + MOVSD X0, (1*8)(SP) + MOVSD X1, (2*8)(SP) + MOVSD X2, (3*8)(SP) + MOVSD X3, (4*8)(SP) + MOVSD X4, (5*8)(SP) + MOVSD X5, (6*8)(SP) + MOVSD X6, (7*8)(SP) + MOVSD X7, (8*8)(SP) + MOVQ DI, (9*8)(SP) + MOVQ SI, (10*8)(SP) + MOVQ DX, (11*8)(SP) + MOVQ CX, (12*8)(SP) + MOVQ R8, (13*8)(SP) + MOVQ R9, (14*8)(SP) + LEAQ 8(SP), R8 // R8 = address of args vector + + PUSHQ R10 // push the stack pointer below registers + + // Switch from the host ABI to the Go ABI. + PUSH_REGS_HOST_TO_ABI0() + + // determine index into runtime·cbs table + MOVQ $callbackasm(SB), DX + SUBQ DX, AX + MOVQ $0, DX + MOVQ $5, CX // divide by 5 because each call instruction in ·callbacks is 5 bytes long + DIVL CX + SUBQ $1, AX // subtract 1 because return PC is to the next slot + + // Create a struct callbackArgs on our stack to be passed as + // the "frame" to cgocallback and on to callbackWrap. + // $24 to make enough room for the arguments to runtime.cgocallback + SUBQ $(24+callbackArgs__size), SP + MOVQ AX, (24+callbackArgs_index)(SP) // callback index + MOVQ R8, (24+callbackArgs_args)(SP) // address of args vector + MOVQ $0, (24+callbackArgs_result)(SP) // result + LEAQ 24(SP), AX // take the address of callbackArgs + + // Call cgocallback, which will call callbackWrap(frame). + MOVQ ·callbackWrap_call(SB), DI // Get the ABIInternal function pointer + MOVQ (DI), DI // without by using a closure. + MOVQ AX, SI // frame (address of callbackArgs) + MOVQ $0, CX // context + + CALL crosscall2(SB) // runtime.cgocallback(fn, frame, ctxt uintptr) + + // Get callback result. + MOVQ (24+callbackArgs_result)(SP), AX + ADDQ $(24+callbackArgs__size), SP // remove callbackArgs struct + + POP_REGS_HOST_TO_ABI0() + + POPQ R10 // get the SP back + ADJSP $-14*8, SP // remove arguments + + MOVQ R10, 0(SP) + + RET diff --git a/vendor/github.com/ebitengine/purego/sys_arm64.s b/vendor/github.com/ebitengine/purego/sys_arm64.s new file mode 100644 index 00000000000..a68fdb99ba7 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/sys_arm64.s @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +//go:build darwin || freebsd || linux || windows + +#include "textflag.h" +#include "go_asm.h" +#include "funcdata.h" + +#define STACK_SIZE 64 +#define PTR_ADDRESS (STACK_SIZE - 8) + +// syscall15X calls a function in libc on behalf of the syscall package. +// syscall15X takes a pointer to a struct like: +// struct { +// fn uintptr +// a1 uintptr +// a2 uintptr +// a3 uintptr +// a4 uintptr +// a5 uintptr +// a6 uintptr +// a7 uintptr +// a8 uintptr +// a9 uintptr +// a10 uintptr +// a11 uintptr +// a12 uintptr +// a13 uintptr +// a14 uintptr +// a15 uintptr +// r1 uintptr +// r2 uintptr +// err uintptr +// } +// syscall15X must be called on the g0 stack with the +// C calling convention (use libcCall). +GLOBL ·syscall15XABI0(SB), NOPTR|RODATA, $8 +DATA ·syscall15XABI0(SB)/8, $syscall15X(SB) +TEXT syscall15X(SB), NOSPLIT, $0 + SUB $STACK_SIZE, RSP // push structure pointer + MOVD R0, PTR_ADDRESS(RSP) + MOVD R0, R9 + + FMOVD syscall15Args_f1(R9), F0 // f1 + FMOVD syscall15Args_f2(R9), F1 // f2 + FMOVD syscall15Args_f3(R9), F2 // f3 + FMOVD syscall15Args_f4(R9), F3 // f4 + FMOVD syscall15Args_f5(R9), F4 // f5 + FMOVD syscall15Args_f6(R9), F5 // f6 + FMOVD syscall15Args_f7(R9), F6 // f7 + FMOVD syscall15Args_f8(R9), F7 // f8 + + MOVD syscall15Args_a1(R9), R0 // a1 + MOVD syscall15Args_a2(R9), R1 // a2 + MOVD syscall15Args_a3(R9), R2 // a3 + MOVD syscall15Args_a4(R9), R3 // a4 + MOVD syscall15Args_a5(R9), R4 // a5 + MOVD syscall15Args_a6(R9), R5 // a6 + MOVD syscall15Args_a7(R9), R6 // a7 + MOVD syscall15Args_a8(R9), R7 // a8 + MOVD syscall15Args_arm64_r8(R9), R8 // r8 + + MOVD syscall15Args_a9(R9), R10 + MOVD R10, 0(RSP) // push a9 onto stack + MOVD syscall15Args_a10(R9), R10 + MOVD R10, 8(RSP) // push a10 onto stack + MOVD syscall15Args_a11(R9), R10 + MOVD R10, 16(RSP) // push a11 onto stack + MOVD syscall15Args_a12(R9), R10 + MOVD R10, 24(RSP) // push a12 onto stack + MOVD syscall15Args_a13(R9), R10 + MOVD R10, 32(RSP) // push a13 onto stack + MOVD syscall15Args_a14(R9), R10 + MOVD R10, 40(RSP) // push a14 onto stack + MOVD syscall15Args_a15(R9), R10 + MOVD R10, 48(RSP) // push a15 onto stack + + MOVD syscall15Args_fn(R9), R10 // fn + BL (R10) + + MOVD PTR_ADDRESS(RSP), R2 // pop structure pointer + ADD $STACK_SIZE, RSP + + MOVD R0, syscall15Args_a1(R2) // save r1 + MOVD R1, syscall15Args_a2(R2) // save r3 + FMOVD F0, syscall15Args_f1(R2) // save f0 + FMOVD F1, syscall15Args_f2(R2) // save f1 + FMOVD F2, syscall15Args_f3(R2) // save f2 + FMOVD F3, syscall15Args_f4(R2) // save f3 + + RET diff --git a/vendor/github.com/ebitengine/purego/sys_unix_arm64.s b/vendor/github.com/ebitengine/purego/sys_unix_arm64.s new file mode 100644 index 00000000000..6da06b4d188 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/sys_unix_arm64.s @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2023 The Ebitengine Authors + +//go:build darwin || freebsd || linux + +#include "textflag.h" +#include "go_asm.h" +#include "funcdata.h" +#include "abi_arm64.h" + +TEXT callbackasm1(SB), NOSPLIT|NOFRAME, $0 + NO_LOCAL_POINTERS + + // On entry, the trampoline in zcallback_darwin_arm64.s left + // the callback index in R12 (which is volatile in the C ABI). + + // Save callback register arguments R0-R7 and F0-F7. + // We do this at the top of the frame so they're contiguous with stack arguments. + SUB $(16*8), RSP, R14 + FSTPD (F0, F1), (0*8)(R14) + FSTPD (F2, F3), (2*8)(R14) + FSTPD (F4, F5), (4*8)(R14) + FSTPD (F6, F7), (6*8)(R14) + STP (R0, R1), (8*8)(R14) + STP (R2, R3), (10*8)(R14) + STP (R4, R5), (12*8)(R14) + STP (R6, R7), (14*8)(R14) + + // Adjust SP by frame size. + SUB $(26*8), RSP + + // It is important to save R27 because the go assembler + // uses it for move instructions for a variable. + // This line: + // MOVD ·callbackWrap_call(SB), R0 + // Creates the instructions: + // ADRP 14335(PC), R27 + // MOVD 388(27), R0 + // R27 is a callee saved register so we are responsible + // for ensuring its value doesn't change. So save it and + // restore it at the end of this function. + // R30 is the link register. crosscall2 doesn't save it + // so it's saved here. + STP (R27, R30), 0(RSP) + + // Create a struct callbackArgs on our stack. + MOVD $(callbackArgs__size)(RSP), R13 + MOVD R12, callbackArgs_index(R13) // callback index + MOVD R14, callbackArgs_args(R13) // address of args vector + MOVD ZR, callbackArgs_result(R13) // result + + // Move parameters into registers + // Get the ABIInternal function pointer + // without by using a closure. + MOVD ·callbackWrap_call(SB), R0 + MOVD (R0), R0 // fn unsafe.Pointer + MOVD R13, R1 // frame (&callbackArgs{...}) + MOVD $0, R3 // ctxt uintptr + + BL crosscall2(SB) + + // Get callback result. + MOVD $(callbackArgs__size)(RSP), R13 + MOVD callbackArgs_result(R13), R0 + + // Restore LR and R27 + LDP 0(RSP), (R27, R30) + ADD $(26*8), RSP + + RET diff --git a/vendor/github.com/ebitengine/purego/syscall.go b/vendor/github.com/ebitengine/purego/syscall.go new file mode 100644 index 00000000000..c30688dda13 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/syscall.go @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +//go:build darwin || freebsd || linux || windows + +package purego + +// CDecl marks a function as being called using the __cdecl calling convention as defined in +// the [MSDocs] when passed to NewCallback. It must be the first argument to the function. +// This is only useful on 386 Windows, but it is safe to use on other platforms. +// +// [MSDocs]: https://learn.microsoft.com/en-us/cpp/cpp/cdecl?view=msvc-170 +type CDecl struct{} + +const ( + maxArgs = 15 + numOfFloats = 8 // arm64 and amd64 both have 8 float registers +) + +type syscall15Args struct { + fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr + f1, f2, f3, f4, f5, f6, f7, f8 uintptr + arm64_r8 uintptr +} + +// SyscallN takes fn, a C function pointer and a list of arguments as uintptr. +// There is an internal maximum number of arguments that SyscallN can take. It panics +// when the maximum is exceeded. It returns the result and the libc error code if there is one. +// +// NOTE: SyscallN does not properly call functions that have both integer and float parameters. +// See discussion comment https://github.com/ebiten/purego/pull/1#issuecomment-1128057607 +// for an explanation of why that is. +// +// On amd64, if there are more than 8 floats the 9th and so on will be placed incorrectly on the +// stack. +// +// The pragma go:nosplit is not needed at this function declaration because it uses go:uintptrescapes +// which forces all the objects that the uintptrs point to onto the heap where a stack split won't affect +// their memory location. +// +//go:uintptrescapes +func SyscallN(fn uintptr, args ...uintptr) (r1, r2, err uintptr) { + if fn == 0 { + panic("purego: fn is nil") + } + if len(args) > maxArgs { + panic("purego: too many arguments to SyscallN") + } + // add padding so there is no out-of-bounds slicing + var tmp [maxArgs]uintptr + copy(tmp[:], args) + return syscall_syscall15X(fn, tmp[0], tmp[1], tmp[2], tmp[3], tmp[4], tmp[5], tmp[6], tmp[7], tmp[8], tmp[9], tmp[10], tmp[11], tmp[12], tmp[13], tmp[14]) +} diff --git a/vendor/github.com/ebitengine/purego/syscall_cgo_linux.go b/vendor/github.com/ebitengine/purego/syscall_cgo_linux.go new file mode 100644 index 00000000000..36ee14e3b73 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/syscall_cgo_linux.go @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +//go:build cgo && !(amd64 || arm64) + +package purego + +import ( + "github.com/ebitengine/purego/internal/cgo" +) + +var syscall15XABI0 = uintptr(cgo.Syscall15XABI0) + +//go:nosplit +func syscall_syscall15X(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr) (r1, r2, err uintptr) { + return cgo.Syscall15X(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15) +} + +func NewCallback(_ interface{}) uintptr { + panic("purego: NewCallback on Linux is only supported on amd64/arm64") +} diff --git a/vendor/github.com/ebitengine/purego/syscall_sysv.go b/vendor/github.com/ebitengine/purego/syscall_sysv.go new file mode 100644 index 00000000000..cce171c8f60 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/syscall_sysv.go @@ -0,0 +1,223 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +//go:build darwin || freebsd || (linux && (amd64 || arm64)) + +package purego + +import ( + "reflect" + "runtime" + "sync" + "unsafe" +) + +var syscall15XABI0 uintptr + +//go:nosplit +func syscall_syscall15X(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr) (r1, r2, err uintptr) { + args := syscall15Args{ + fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, + a1, a2, a3, a4, a5, a6, a7, a8, + 0, + } + runtime_cgocall(syscall15XABI0, unsafe.Pointer(&args)) + return args.a1, args.a2, 0 +} + +// NewCallback converts a Go function to a function pointer conforming to the C calling convention. +// This is useful when interoperating with C code requiring callbacks. The argument is expected to be a +// function with zero or one uintptr-sized result. The function must not have arguments with size larger than the size +// of uintptr. Only a limited number of callbacks may be created in a single Go process, and any memory allocated +// for these callbacks is never released. At least 2000 callbacks can always be created. Although this function +// provides similar functionality to windows.NewCallback it is distinct. +func NewCallback(fn interface{}) uintptr { + ty := reflect.TypeOf(fn) + for i := 0; i < ty.NumIn(); i++ { + in := ty.In(i) + if !in.AssignableTo(reflect.TypeOf(CDecl{})) { + continue + } + if i != 0 { + panic("purego: CDecl must be the first argument") + } + } + return compileCallback(fn) +} + +// maxCb is the maximum number of callbacks +// only increase this if you have added more to the callbackasm function +const maxCB = 2000 + +var cbs struct { + lock sync.Mutex + numFn int // the number of functions currently in cbs.funcs + funcs [maxCB]reflect.Value // the saved callbacks +} + +type callbackArgs struct { + index uintptr + // args points to the argument block. + // + // The structure of the arguments goes + // float registers followed by the + // integer registers followed by the stack. + // + // This variable is treated as a continuous + // block of memory containing all of the arguments + // for this callback. + args unsafe.Pointer + // Below are out-args from callbackWrap + result uintptr +} + +func compileCallback(fn interface{}) uintptr { + val := reflect.ValueOf(fn) + if val.Kind() != reflect.Func { + panic("purego: the type must be a function but was not") + } + if val.IsNil() { + panic("purego: function must not be nil") + } + ty := val.Type() + for i := 0; i < ty.NumIn(); i++ { + in := ty.In(i) + switch in.Kind() { + case reflect.Struct: + if i == 0 && in.AssignableTo(reflect.TypeOf(CDecl{})) { + continue + } + fallthrough + case reflect.Interface, reflect.Func, reflect.Slice, + reflect.Chan, reflect.Complex64, reflect.Complex128, + reflect.String, reflect.Map, reflect.Invalid: + panic("purego: unsupported argument type: " + in.Kind().String()) + } + } +output: + switch { + case ty.NumOut() == 1: + switch ty.Out(0).Kind() { + case reflect.Pointer, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, + reflect.Bool, reflect.UnsafePointer: + break output + } + panic("purego: unsupported return type: " + ty.String()) + case ty.NumOut() > 1: + panic("purego: callbacks can only have one return") + } + cbs.lock.Lock() + defer cbs.lock.Unlock() + if cbs.numFn >= maxCB { + panic("purego: the maximum number of callbacks has been reached") + } + cbs.funcs[cbs.numFn] = val + cbs.numFn++ + return callbackasmAddr(cbs.numFn - 1) +} + +const ptrSize = unsafe.Sizeof((*int)(nil)) + +const callbackMaxFrame = 64 * ptrSize + +// callbackasm is implemented in zcallback_GOOS_GOARCH.s +// +//go:linkname __callbackasm callbackasm +var __callbackasm byte +var callbackasmABI0 = uintptr(unsafe.Pointer(&__callbackasm)) + +// callbackWrap_call allows the calling of the ABIInternal wrapper +// which is required for runtime.cgocallback without the +// tag which is only allowed in the runtime. +// This closure is used inside sys_darwin_GOARCH.s +var callbackWrap_call = callbackWrap + +// callbackWrap is called by assembly code which determines which Go function to call. +// This function takes the arguments and passes them to the Go function and returns the result. +func callbackWrap(a *callbackArgs) { + cbs.lock.Lock() + fn := cbs.funcs[a.index] + cbs.lock.Unlock() + fnType := fn.Type() + args := make([]reflect.Value, fnType.NumIn()) + frame := (*[callbackMaxFrame]uintptr)(a.args) + var floatsN int // floatsN represents the number of float arguments processed + var intsN int // intsN represents the number of integer arguments processed + // stack points to the index into frame of the current stack element. + // The stack begins after the float and integer registers. + stack := numOfIntegerRegisters() + numOfFloats + for i := range args { + var pos int + switch fnType.In(i).Kind() { + case reflect.Float32, reflect.Float64: + if floatsN >= numOfFloats { + pos = stack + stack++ + } else { + pos = floatsN + } + floatsN++ + case reflect.Struct: + // This is the CDecl field + args[i] = reflect.Zero(fnType.In(i)) + continue + default: + + if intsN >= numOfIntegerRegisters() { + pos = stack + stack++ + } else { + // the integers begin after the floats in frame + pos = intsN + numOfFloats + } + intsN++ + } + args[i] = reflect.NewAt(fnType.In(i), unsafe.Pointer(&frame[pos])).Elem() + } + ret := fn.Call(args) + if len(ret) > 0 { + switch k := ret[0].Kind(); k { + case reflect.Uint, reflect.Uint64, reflect.Uint32, reflect.Uint16, reflect.Uint8, reflect.Uintptr: + a.result = uintptr(ret[0].Uint()) + case reflect.Int, reflect.Int64, reflect.Int32, reflect.Int16, reflect.Int8: + a.result = uintptr(ret[0].Int()) + case reflect.Bool: + if ret[0].Bool() { + a.result = 1 + } else { + a.result = 0 + } + case reflect.Pointer: + a.result = ret[0].Pointer() + case reflect.UnsafePointer: + a.result = ret[0].Pointer() + default: + panic("purego: unsupported kind: " + k.String()) + } + } +} + +// callbackasmAddr returns address of runtime.callbackasm +// function adjusted by i. +// On x86 and amd64, runtime.callbackasm is a series of CALL instructions, +// and we want callback to arrive at +// correspondent call instruction instead of start of +// runtime.callbackasm. +// On ARM, runtime.callbackasm is a series of mov and branch instructions. +// R12 is loaded with the callback index. Each entry is two instructions, +// hence 8 bytes. +func callbackasmAddr(i int) uintptr { + var entrySize int + switch runtime.GOARCH { + default: + panic("purego: unsupported architecture") + case "386", "amd64": + entrySize = 5 + case "arm", "arm64": + // On ARM and ARM64, each entry is a MOV instruction + // followed by a branch instruction + entrySize = 8 + } + return callbackasmABI0 + uintptr(i*entrySize) +} diff --git a/vendor/github.com/ebitengine/purego/syscall_windows.go b/vendor/github.com/ebitengine/purego/syscall_windows.go new file mode 100644 index 00000000000..5fbfcabfdc9 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/syscall_windows.go @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +package purego + +import ( + "reflect" + "syscall" +) + +var syscall15XABI0 uintptr + +func syscall_syscall15X(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr) (r1, r2, err uintptr) { + r1, r2, errno := syscall.Syscall15(fn, 15, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15) + return r1, r2, uintptr(errno) +} + +// NewCallback converts a Go function to a function pointer conforming to the stdcall calling convention. +// This is useful when interoperating with Windows code requiring callbacks. The argument is expected to be a +// function with one uintptr-sized result. The function must not have arguments with size larger than the +// size of uintptr. Only a limited number of callbacks may be created in a single Go process, and any memory +// allocated for these callbacks is never released. Between NewCallback and NewCallbackCDecl, at least 1024 +// callbacks can always be created. Although this function is similiar to the darwin version it may act +// differently. +func NewCallback(fn interface{}) uintptr { + isCDecl := false + ty := reflect.TypeOf(fn) + for i := 0; i < ty.NumIn(); i++ { + in := ty.In(i) + if !in.AssignableTo(reflect.TypeOf(CDecl{})) { + continue + } + if i != 0 { + panic("purego: CDecl must be the first argument") + } + isCDecl = true + } + if isCDecl { + return syscall.NewCallbackCDecl(fn) + } + return syscall.NewCallback(fn) +} + +func loadSymbol(handle uintptr, name string) (uintptr, error) { + return syscall.GetProcAddress(syscall.Handle(handle), name) +} diff --git a/vendor/github.com/ebitengine/purego/zcallback_amd64.s b/vendor/github.com/ebitengine/purego/zcallback_amd64.s new file mode 100644 index 00000000000..6a778bfcad1 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/zcallback_amd64.s @@ -0,0 +1,2014 @@ +// Code generated by wincallback.go using 'go generate'. DO NOT EDIT. + +//go:build darwin || freebsd || linux + +// runtime·callbackasm is called by external code to +// execute Go implemented callback function. It is not +// called from the start, instead runtime·compilecallback +// always returns address into runtime·callbackasm offset +// appropriately so different callbacks start with different +// CALL instruction in runtime·callbackasm. This determines +// which Go callback function is executed later on. +#include "textflag.h" + +TEXT callbackasm(SB), NOSPLIT|NOFRAME, $0 + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) + CALL callbackasm1(SB) diff --git a/vendor/github.com/ebitengine/purego/zcallback_arm64.s b/vendor/github.com/ebitengine/purego/zcallback_arm64.s new file mode 100644 index 00000000000..c079b8038e3 --- /dev/null +++ b/vendor/github.com/ebitengine/purego/zcallback_arm64.s @@ -0,0 +1,4014 @@ +// Code generated by wincallback.go using 'go generate'. DO NOT EDIT. + +//go:build darwin || freebsd || linux + +// External code calls into callbackasm at an offset corresponding +// to the callback index. Callbackasm is a table of MOV and B instructions. +// The MOV instruction loads R12 with the callback index, and the +// B instruction branches to callbackasm1. +// callbackasm1 takes the callback index from R12 and +// indexes into an array that stores information about each callback. +// It then calls the Go implementation for that callback. +#include "textflag.h" + +TEXT callbackasm(SB), NOSPLIT|NOFRAME, $0 + MOVD $0, R12 + B callbackasm1(SB) + MOVD $1, R12 + B callbackasm1(SB) + MOVD $2, R12 + B callbackasm1(SB) + MOVD $3, R12 + B callbackasm1(SB) + MOVD $4, R12 + B callbackasm1(SB) + MOVD $5, R12 + B callbackasm1(SB) + MOVD $6, R12 + B callbackasm1(SB) + MOVD $7, R12 + B callbackasm1(SB) + MOVD $8, R12 + B callbackasm1(SB) + MOVD $9, R12 + B callbackasm1(SB) + MOVD $10, R12 + B callbackasm1(SB) + MOVD $11, R12 + B callbackasm1(SB) + MOVD $12, R12 + B callbackasm1(SB) + MOVD $13, R12 + B callbackasm1(SB) + MOVD $14, R12 + B callbackasm1(SB) + MOVD $15, R12 + B callbackasm1(SB) + MOVD $16, R12 + B callbackasm1(SB) + MOVD $17, R12 + B callbackasm1(SB) + MOVD $18, R12 + B callbackasm1(SB) + MOVD $19, R12 + B callbackasm1(SB) + MOVD $20, R12 + B callbackasm1(SB) + MOVD $21, R12 + B callbackasm1(SB) + MOVD $22, R12 + B callbackasm1(SB) + MOVD $23, R12 + B callbackasm1(SB) + MOVD $24, R12 + B callbackasm1(SB) + MOVD $25, R12 + B callbackasm1(SB) + MOVD $26, R12 + B callbackasm1(SB) + MOVD $27, R12 + B callbackasm1(SB) + MOVD $28, R12 + B callbackasm1(SB) + MOVD $29, R12 + B callbackasm1(SB) + MOVD $30, R12 + B callbackasm1(SB) + MOVD $31, R12 + B callbackasm1(SB) + MOVD $32, R12 + B callbackasm1(SB) + MOVD $33, R12 + B callbackasm1(SB) + MOVD $34, R12 + B callbackasm1(SB) + MOVD $35, R12 + B callbackasm1(SB) + MOVD $36, R12 + B callbackasm1(SB) + MOVD $37, R12 + B callbackasm1(SB) + MOVD $38, R12 + B callbackasm1(SB) + MOVD $39, R12 + B callbackasm1(SB) + MOVD $40, R12 + B callbackasm1(SB) + MOVD $41, R12 + B callbackasm1(SB) + MOVD $42, R12 + B callbackasm1(SB) + MOVD $43, R12 + B callbackasm1(SB) + MOVD $44, R12 + B callbackasm1(SB) + MOVD $45, R12 + B callbackasm1(SB) + MOVD $46, R12 + B callbackasm1(SB) + MOVD $47, R12 + B callbackasm1(SB) + MOVD $48, R12 + B callbackasm1(SB) + MOVD $49, R12 + B callbackasm1(SB) + MOVD $50, R12 + B callbackasm1(SB) + MOVD $51, R12 + B callbackasm1(SB) + MOVD $52, R12 + B callbackasm1(SB) + MOVD $53, R12 + B callbackasm1(SB) + MOVD $54, R12 + B callbackasm1(SB) + MOVD $55, R12 + B callbackasm1(SB) + MOVD $56, R12 + B callbackasm1(SB) + MOVD $57, R12 + B callbackasm1(SB) + MOVD $58, R12 + B callbackasm1(SB) + MOVD $59, R12 + B callbackasm1(SB) + MOVD $60, R12 + B callbackasm1(SB) + MOVD $61, R12 + B callbackasm1(SB) + MOVD $62, R12 + B callbackasm1(SB) + MOVD $63, R12 + B callbackasm1(SB) + MOVD $64, R12 + B callbackasm1(SB) + MOVD $65, R12 + B callbackasm1(SB) + MOVD $66, R12 + B callbackasm1(SB) + MOVD $67, R12 + B callbackasm1(SB) + MOVD $68, R12 + B callbackasm1(SB) + MOVD $69, R12 + B callbackasm1(SB) + MOVD $70, R12 + B callbackasm1(SB) + MOVD $71, R12 + B callbackasm1(SB) + MOVD $72, R12 + B callbackasm1(SB) + MOVD $73, R12 + B callbackasm1(SB) + MOVD $74, R12 + B callbackasm1(SB) + MOVD $75, R12 + B callbackasm1(SB) + MOVD $76, R12 + B callbackasm1(SB) + MOVD $77, R12 + B callbackasm1(SB) + MOVD $78, R12 + B callbackasm1(SB) + MOVD $79, R12 + B callbackasm1(SB) + MOVD $80, R12 + B callbackasm1(SB) + MOVD $81, R12 + B callbackasm1(SB) + MOVD $82, R12 + B callbackasm1(SB) + MOVD $83, R12 + B callbackasm1(SB) + MOVD $84, R12 + B callbackasm1(SB) + MOVD $85, R12 + B callbackasm1(SB) + MOVD $86, R12 + B callbackasm1(SB) + MOVD $87, R12 + B callbackasm1(SB) + MOVD $88, R12 + B callbackasm1(SB) + MOVD $89, R12 + B callbackasm1(SB) + MOVD $90, R12 + B callbackasm1(SB) + MOVD $91, R12 + B callbackasm1(SB) + MOVD $92, R12 + B callbackasm1(SB) + MOVD $93, R12 + B callbackasm1(SB) + MOVD $94, R12 + B callbackasm1(SB) + MOVD $95, R12 + B callbackasm1(SB) + MOVD $96, R12 + B callbackasm1(SB) + MOVD $97, R12 + B callbackasm1(SB) + MOVD $98, R12 + B callbackasm1(SB) + MOVD $99, R12 + B callbackasm1(SB) + MOVD $100, R12 + B callbackasm1(SB) + MOVD $101, R12 + B callbackasm1(SB) + MOVD $102, R12 + B callbackasm1(SB) + MOVD $103, R12 + B callbackasm1(SB) + MOVD $104, R12 + B callbackasm1(SB) + MOVD $105, R12 + B callbackasm1(SB) + MOVD $106, R12 + B callbackasm1(SB) + MOVD $107, R12 + B callbackasm1(SB) + MOVD $108, R12 + B callbackasm1(SB) + MOVD $109, R12 + B callbackasm1(SB) + MOVD $110, R12 + B callbackasm1(SB) + MOVD $111, R12 + B callbackasm1(SB) + MOVD $112, R12 + B callbackasm1(SB) + MOVD $113, R12 + B callbackasm1(SB) + MOVD $114, R12 + B callbackasm1(SB) + MOVD $115, R12 + B callbackasm1(SB) + MOVD $116, R12 + B callbackasm1(SB) + MOVD $117, R12 + B callbackasm1(SB) + MOVD $118, R12 + B callbackasm1(SB) + MOVD $119, R12 + B callbackasm1(SB) + MOVD $120, R12 + B callbackasm1(SB) + MOVD $121, R12 + B callbackasm1(SB) + MOVD $122, R12 + B callbackasm1(SB) + MOVD $123, R12 + B callbackasm1(SB) + MOVD $124, R12 + B callbackasm1(SB) + MOVD $125, R12 + B callbackasm1(SB) + MOVD $126, R12 + B callbackasm1(SB) + MOVD $127, R12 + B callbackasm1(SB) + MOVD $128, R12 + B callbackasm1(SB) + MOVD $129, R12 + B callbackasm1(SB) + MOVD $130, R12 + B callbackasm1(SB) + MOVD $131, R12 + B callbackasm1(SB) + MOVD $132, R12 + B callbackasm1(SB) + MOVD $133, R12 + B callbackasm1(SB) + MOVD $134, R12 + B callbackasm1(SB) + MOVD $135, R12 + B callbackasm1(SB) + MOVD $136, R12 + B callbackasm1(SB) + MOVD $137, R12 + B callbackasm1(SB) + MOVD $138, R12 + B callbackasm1(SB) + MOVD $139, R12 + B callbackasm1(SB) + MOVD $140, R12 + B callbackasm1(SB) + MOVD $141, R12 + B callbackasm1(SB) + MOVD $142, R12 + B callbackasm1(SB) + MOVD $143, R12 + B callbackasm1(SB) + MOVD $144, R12 + B callbackasm1(SB) + MOVD $145, R12 + B callbackasm1(SB) + MOVD $146, R12 + B callbackasm1(SB) + MOVD $147, R12 + B callbackasm1(SB) + MOVD $148, R12 + B callbackasm1(SB) + MOVD $149, R12 + B callbackasm1(SB) + MOVD $150, R12 + B callbackasm1(SB) + MOVD $151, R12 + B callbackasm1(SB) + MOVD $152, R12 + B callbackasm1(SB) + MOVD $153, R12 + B callbackasm1(SB) + MOVD $154, R12 + B callbackasm1(SB) + MOVD $155, R12 + B callbackasm1(SB) + MOVD $156, R12 + B callbackasm1(SB) + MOVD $157, R12 + B callbackasm1(SB) + MOVD $158, R12 + B callbackasm1(SB) + MOVD $159, R12 + B callbackasm1(SB) + MOVD $160, R12 + B callbackasm1(SB) + MOVD $161, R12 + B callbackasm1(SB) + MOVD $162, R12 + B callbackasm1(SB) + MOVD $163, R12 + B callbackasm1(SB) + MOVD $164, R12 + B callbackasm1(SB) + MOVD $165, R12 + B callbackasm1(SB) + MOVD $166, R12 + B callbackasm1(SB) + MOVD $167, R12 + B callbackasm1(SB) + MOVD $168, R12 + B callbackasm1(SB) + MOVD $169, R12 + B callbackasm1(SB) + MOVD $170, R12 + B callbackasm1(SB) + MOVD $171, R12 + B callbackasm1(SB) + MOVD $172, R12 + B callbackasm1(SB) + MOVD $173, R12 + B callbackasm1(SB) + MOVD $174, R12 + B callbackasm1(SB) + MOVD $175, R12 + B callbackasm1(SB) + MOVD $176, R12 + B callbackasm1(SB) + MOVD $177, R12 + B callbackasm1(SB) + MOVD $178, R12 + B callbackasm1(SB) + MOVD $179, R12 + B callbackasm1(SB) + MOVD $180, R12 + B callbackasm1(SB) + MOVD $181, R12 + B callbackasm1(SB) + MOVD $182, R12 + B callbackasm1(SB) + MOVD $183, R12 + B callbackasm1(SB) + MOVD $184, R12 + B callbackasm1(SB) + MOVD $185, R12 + B callbackasm1(SB) + MOVD $186, R12 + B callbackasm1(SB) + MOVD $187, R12 + B callbackasm1(SB) + MOVD $188, R12 + B callbackasm1(SB) + MOVD $189, R12 + B callbackasm1(SB) + MOVD $190, R12 + B callbackasm1(SB) + MOVD $191, R12 + B callbackasm1(SB) + MOVD $192, R12 + B callbackasm1(SB) + MOVD $193, R12 + B callbackasm1(SB) + MOVD $194, R12 + B callbackasm1(SB) + MOVD $195, R12 + B callbackasm1(SB) + MOVD $196, R12 + B callbackasm1(SB) + MOVD $197, R12 + B callbackasm1(SB) + MOVD $198, R12 + B callbackasm1(SB) + MOVD $199, R12 + B callbackasm1(SB) + MOVD $200, R12 + B callbackasm1(SB) + MOVD $201, R12 + B callbackasm1(SB) + MOVD $202, R12 + B callbackasm1(SB) + MOVD $203, R12 + B callbackasm1(SB) + MOVD $204, R12 + B callbackasm1(SB) + MOVD $205, R12 + B callbackasm1(SB) + MOVD $206, R12 + B callbackasm1(SB) + MOVD $207, R12 + B callbackasm1(SB) + MOVD $208, R12 + B callbackasm1(SB) + MOVD $209, R12 + B callbackasm1(SB) + MOVD $210, R12 + B callbackasm1(SB) + MOVD $211, R12 + B callbackasm1(SB) + MOVD $212, R12 + B callbackasm1(SB) + MOVD $213, R12 + B callbackasm1(SB) + MOVD $214, R12 + B callbackasm1(SB) + MOVD $215, R12 + B callbackasm1(SB) + MOVD $216, R12 + B callbackasm1(SB) + MOVD $217, R12 + B callbackasm1(SB) + MOVD $218, R12 + B callbackasm1(SB) + MOVD $219, R12 + B callbackasm1(SB) + MOVD $220, R12 + B callbackasm1(SB) + MOVD $221, R12 + B callbackasm1(SB) + MOVD $222, R12 + B callbackasm1(SB) + MOVD $223, R12 + B callbackasm1(SB) + MOVD $224, R12 + B callbackasm1(SB) + MOVD $225, R12 + B callbackasm1(SB) + MOVD $226, R12 + B callbackasm1(SB) + MOVD $227, R12 + B callbackasm1(SB) + MOVD $228, R12 + B callbackasm1(SB) + MOVD $229, R12 + B callbackasm1(SB) + MOVD $230, R12 + B callbackasm1(SB) + MOVD $231, R12 + B callbackasm1(SB) + MOVD $232, R12 + B callbackasm1(SB) + MOVD $233, R12 + B callbackasm1(SB) + MOVD $234, R12 + B callbackasm1(SB) + MOVD $235, R12 + B callbackasm1(SB) + MOVD $236, R12 + B callbackasm1(SB) + MOVD $237, R12 + B callbackasm1(SB) + MOVD $238, R12 + B callbackasm1(SB) + MOVD $239, R12 + B callbackasm1(SB) + MOVD $240, R12 + B callbackasm1(SB) + MOVD $241, R12 + B callbackasm1(SB) + MOVD $242, R12 + B callbackasm1(SB) + MOVD $243, R12 + B callbackasm1(SB) + MOVD $244, R12 + B callbackasm1(SB) + MOVD $245, R12 + B callbackasm1(SB) + MOVD $246, R12 + B callbackasm1(SB) + MOVD $247, R12 + B callbackasm1(SB) + MOVD $248, R12 + B callbackasm1(SB) + MOVD $249, R12 + B callbackasm1(SB) + MOVD $250, R12 + B callbackasm1(SB) + MOVD $251, R12 + B callbackasm1(SB) + MOVD $252, R12 + B callbackasm1(SB) + MOVD $253, R12 + B callbackasm1(SB) + MOVD $254, R12 + B callbackasm1(SB) + MOVD $255, R12 + B callbackasm1(SB) + MOVD $256, R12 + B callbackasm1(SB) + MOVD $257, R12 + B callbackasm1(SB) + MOVD $258, R12 + B callbackasm1(SB) + MOVD $259, R12 + B callbackasm1(SB) + MOVD $260, R12 + B callbackasm1(SB) + MOVD $261, R12 + B callbackasm1(SB) + MOVD $262, R12 + B callbackasm1(SB) + MOVD $263, R12 + B callbackasm1(SB) + MOVD $264, R12 + B callbackasm1(SB) + MOVD $265, R12 + B callbackasm1(SB) + MOVD $266, R12 + B callbackasm1(SB) + MOVD $267, R12 + B callbackasm1(SB) + MOVD $268, R12 + B callbackasm1(SB) + MOVD $269, R12 + B callbackasm1(SB) + MOVD $270, R12 + B callbackasm1(SB) + MOVD $271, R12 + B callbackasm1(SB) + MOVD $272, R12 + B callbackasm1(SB) + MOVD $273, R12 + B callbackasm1(SB) + MOVD $274, R12 + B callbackasm1(SB) + MOVD $275, R12 + B callbackasm1(SB) + MOVD $276, R12 + B callbackasm1(SB) + MOVD $277, R12 + B callbackasm1(SB) + MOVD $278, R12 + B callbackasm1(SB) + MOVD $279, R12 + B callbackasm1(SB) + MOVD $280, R12 + B callbackasm1(SB) + MOVD $281, R12 + B callbackasm1(SB) + MOVD $282, R12 + B callbackasm1(SB) + MOVD $283, R12 + B callbackasm1(SB) + MOVD $284, R12 + B callbackasm1(SB) + MOVD $285, R12 + B callbackasm1(SB) + MOVD $286, R12 + B callbackasm1(SB) + MOVD $287, R12 + B callbackasm1(SB) + MOVD $288, R12 + B callbackasm1(SB) + MOVD $289, R12 + B callbackasm1(SB) + MOVD $290, R12 + B callbackasm1(SB) + MOVD $291, R12 + B callbackasm1(SB) + MOVD $292, R12 + B callbackasm1(SB) + MOVD $293, R12 + B callbackasm1(SB) + MOVD $294, R12 + B callbackasm1(SB) + MOVD $295, R12 + B callbackasm1(SB) + MOVD $296, R12 + B callbackasm1(SB) + MOVD $297, R12 + B callbackasm1(SB) + MOVD $298, R12 + B callbackasm1(SB) + MOVD $299, R12 + B callbackasm1(SB) + MOVD $300, R12 + B callbackasm1(SB) + MOVD $301, R12 + B callbackasm1(SB) + MOVD $302, R12 + B callbackasm1(SB) + MOVD $303, R12 + B callbackasm1(SB) + MOVD $304, R12 + B callbackasm1(SB) + MOVD $305, R12 + B callbackasm1(SB) + MOVD $306, R12 + B callbackasm1(SB) + MOVD $307, R12 + B callbackasm1(SB) + MOVD $308, R12 + B callbackasm1(SB) + MOVD $309, R12 + B callbackasm1(SB) + MOVD $310, R12 + B callbackasm1(SB) + MOVD $311, R12 + B callbackasm1(SB) + MOVD $312, R12 + B callbackasm1(SB) + MOVD $313, R12 + B callbackasm1(SB) + MOVD $314, R12 + B callbackasm1(SB) + MOVD $315, R12 + B callbackasm1(SB) + MOVD $316, R12 + B callbackasm1(SB) + MOVD $317, R12 + B callbackasm1(SB) + MOVD $318, R12 + B callbackasm1(SB) + MOVD $319, R12 + B callbackasm1(SB) + MOVD $320, R12 + B callbackasm1(SB) + MOVD $321, R12 + B callbackasm1(SB) + MOVD $322, R12 + B callbackasm1(SB) + MOVD $323, R12 + B callbackasm1(SB) + MOVD $324, R12 + B callbackasm1(SB) + MOVD $325, R12 + B callbackasm1(SB) + MOVD $326, R12 + B callbackasm1(SB) + MOVD $327, R12 + B callbackasm1(SB) + MOVD $328, R12 + B callbackasm1(SB) + MOVD $329, R12 + B callbackasm1(SB) + MOVD $330, R12 + B callbackasm1(SB) + MOVD $331, R12 + B callbackasm1(SB) + MOVD $332, R12 + B callbackasm1(SB) + MOVD $333, R12 + B callbackasm1(SB) + MOVD $334, R12 + B callbackasm1(SB) + MOVD $335, R12 + B callbackasm1(SB) + MOVD $336, R12 + B callbackasm1(SB) + MOVD $337, R12 + B callbackasm1(SB) + MOVD $338, R12 + B callbackasm1(SB) + MOVD $339, R12 + B callbackasm1(SB) + MOVD $340, R12 + B callbackasm1(SB) + MOVD $341, R12 + B callbackasm1(SB) + MOVD $342, R12 + B callbackasm1(SB) + MOVD $343, R12 + B callbackasm1(SB) + MOVD $344, R12 + B callbackasm1(SB) + MOVD $345, R12 + B callbackasm1(SB) + MOVD $346, R12 + B callbackasm1(SB) + MOVD $347, R12 + B callbackasm1(SB) + MOVD $348, R12 + B callbackasm1(SB) + MOVD $349, R12 + B callbackasm1(SB) + MOVD $350, R12 + B callbackasm1(SB) + MOVD $351, R12 + B callbackasm1(SB) + MOVD $352, R12 + B callbackasm1(SB) + MOVD $353, R12 + B callbackasm1(SB) + MOVD $354, R12 + B callbackasm1(SB) + MOVD $355, R12 + B callbackasm1(SB) + MOVD $356, R12 + B callbackasm1(SB) + MOVD $357, R12 + B callbackasm1(SB) + MOVD $358, R12 + B callbackasm1(SB) + MOVD $359, R12 + B callbackasm1(SB) + MOVD $360, R12 + B callbackasm1(SB) + MOVD $361, R12 + B callbackasm1(SB) + MOVD $362, R12 + B callbackasm1(SB) + MOVD $363, R12 + B callbackasm1(SB) + MOVD $364, R12 + B callbackasm1(SB) + MOVD $365, R12 + B callbackasm1(SB) + MOVD $366, R12 + B callbackasm1(SB) + MOVD $367, R12 + B callbackasm1(SB) + MOVD $368, R12 + B callbackasm1(SB) + MOVD $369, R12 + B callbackasm1(SB) + MOVD $370, R12 + B callbackasm1(SB) + MOVD $371, R12 + B callbackasm1(SB) + MOVD $372, R12 + B callbackasm1(SB) + MOVD $373, R12 + B callbackasm1(SB) + MOVD $374, R12 + B callbackasm1(SB) + MOVD $375, R12 + B callbackasm1(SB) + MOVD $376, R12 + B callbackasm1(SB) + MOVD $377, R12 + B callbackasm1(SB) + MOVD $378, R12 + B callbackasm1(SB) + MOVD $379, R12 + B callbackasm1(SB) + MOVD $380, R12 + B callbackasm1(SB) + MOVD $381, R12 + B callbackasm1(SB) + MOVD $382, R12 + B callbackasm1(SB) + MOVD $383, R12 + B callbackasm1(SB) + MOVD $384, R12 + B callbackasm1(SB) + MOVD $385, R12 + B callbackasm1(SB) + MOVD $386, R12 + B callbackasm1(SB) + MOVD $387, R12 + B callbackasm1(SB) + MOVD $388, R12 + B callbackasm1(SB) + MOVD $389, R12 + B callbackasm1(SB) + MOVD $390, R12 + B callbackasm1(SB) + MOVD $391, R12 + B callbackasm1(SB) + MOVD $392, R12 + B callbackasm1(SB) + MOVD $393, R12 + B callbackasm1(SB) + MOVD $394, R12 + B callbackasm1(SB) + MOVD $395, R12 + B callbackasm1(SB) + MOVD $396, R12 + B callbackasm1(SB) + MOVD $397, R12 + B callbackasm1(SB) + MOVD $398, R12 + B callbackasm1(SB) + MOVD $399, R12 + B callbackasm1(SB) + MOVD $400, R12 + B callbackasm1(SB) + MOVD $401, R12 + B callbackasm1(SB) + MOVD $402, R12 + B callbackasm1(SB) + MOVD $403, R12 + B callbackasm1(SB) + MOVD $404, R12 + B callbackasm1(SB) + MOVD $405, R12 + B callbackasm1(SB) + MOVD $406, R12 + B callbackasm1(SB) + MOVD $407, R12 + B callbackasm1(SB) + MOVD $408, R12 + B callbackasm1(SB) + MOVD $409, R12 + B callbackasm1(SB) + MOVD $410, R12 + B callbackasm1(SB) + MOVD $411, R12 + B callbackasm1(SB) + MOVD $412, R12 + B callbackasm1(SB) + MOVD $413, R12 + B callbackasm1(SB) + MOVD $414, R12 + B callbackasm1(SB) + MOVD $415, R12 + B callbackasm1(SB) + MOVD $416, R12 + B callbackasm1(SB) + MOVD $417, R12 + B callbackasm1(SB) + MOVD $418, R12 + B callbackasm1(SB) + MOVD $419, R12 + B callbackasm1(SB) + MOVD $420, R12 + B callbackasm1(SB) + MOVD $421, R12 + B callbackasm1(SB) + MOVD $422, R12 + B callbackasm1(SB) + MOVD $423, R12 + B callbackasm1(SB) + MOVD $424, R12 + B callbackasm1(SB) + MOVD $425, R12 + B callbackasm1(SB) + MOVD $426, R12 + B callbackasm1(SB) + MOVD $427, R12 + B callbackasm1(SB) + MOVD $428, R12 + B callbackasm1(SB) + MOVD $429, R12 + B callbackasm1(SB) + MOVD $430, R12 + B callbackasm1(SB) + MOVD $431, R12 + B callbackasm1(SB) + MOVD $432, R12 + B callbackasm1(SB) + MOVD $433, R12 + B callbackasm1(SB) + MOVD $434, R12 + B callbackasm1(SB) + MOVD $435, R12 + B callbackasm1(SB) + MOVD $436, R12 + B callbackasm1(SB) + MOVD $437, R12 + B callbackasm1(SB) + MOVD $438, R12 + B callbackasm1(SB) + MOVD $439, R12 + B callbackasm1(SB) + MOVD $440, R12 + B callbackasm1(SB) + MOVD $441, R12 + B callbackasm1(SB) + MOVD $442, R12 + B callbackasm1(SB) + MOVD $443, R12 + B callbackasm1(SB) + MOVD $444, R12 + B callbackasm1(SB) + MOVD $445, R12 + B callbackasm1(SB) + MOVD $446, R12 + B callbackasm1(SB) + MOVD $447, R12 + B callbackasm1(SB) + MOVD $448, R12 + B callbackasm1(SB) + MOVD $449, R12 + B callbackasm1(SB) + MOVD $450, R12 + B callbackasm1(SB) + MOVD $451, R12 + B callbackasm1(SB) + MOVD $452, R12 + B callbackasm1(SB) + MOVD $453, R12 + B callbackasm1(SB) + MOVD $454, R12 + B callbackasm1(SB) + MOVD $455, R12 + B callbackasm1(SB) + MOVD $456, R12 + B callbackasm1(SB) + MOVD $457, R12 + B callbackasm1(SB) + MOVD $458, R12 + B callbackasm1(SB) + MOVD $459, R12 + B callbackasm1(SB) + MOVD $460, R12 + B callbackasm1(SB) + MOVD $461, R12 + B callbackasm1(SB) + MOVD $462, R12 + B callbackasm1(SB) + MOVD $463, R12 + B callbackasm1(SB) + MOVD $464, R12 + B callbackasm1(SB) + MOVD $465, R12 + B callbackasm1(SB) + MOVD $466, R12 + B callbackasm1(SB) + MOVD $467, R12 + B callbackasm1(SB) + MOVD $468, R12 + B callbackasm1(SB) + MOVD $469, R12 + B callbackasm1(SB) + MOVD $470, R12 + B callbackasm1(SB) + MOVD $471, R12 + B callbackasm1(SB) + MOVD $472, R12 + B callbackasm1(SB) + MOVD $473, R12 + B callbackasm1(SB) + MOVD $474, R12 + B callbackasm1(SB) + MOVD $475, R12 + B callbackasm1(SB) + MOVD $476, R12 + B callbackasm1(SB) + MOVD $477, R12 + B callbackasm1(SB) + MOVD $478, R12 + B callbackasm1(SB) + MOVD $479, R12 + B callbackasm1(SB) + MOVD $480, R12 + B callbackasm1(SB) + MOVD $481, R12 + B callbackasm1(SB) + MOVD $482, R12 + B callbackasm1(SB) + MOVD $483, R12 + B callbackasm1(SB) + MOVD $484, R12 + B callbackasm1(SB) + MOVD $485, R12 + B callbackasm1(SB) + MOVD $486, R12 + B callbackasm1(SB) + MOVD $487, R12 + B callbackasm1(SB) + MOVD $488, R12 + B callbackasm1(SB) + MOVD $489, R12 + B callbackasm1(SB) + MOVD $490, R12 + B callbackasm1(SB) + MOVD $491, R12 + B callbackasm1(SB) + MOVD $492, R12 + B callbackasm1(SB) + MOVD $493, R12 + B callbackasm1(SB) + MOVD $494, R12 + B callbackasm1(SB) + MOVD $495, R12 + B callbackasm1(SB) + MOVD $496, R12 + B callbackasm1(SB) + MOVD $497, R12 + B callbackasm1(SB) + MOVD $498, R12 + B callbackasm1(SB) + MOVD $499, R12 + B callbackasm1(SB) + MOVD $500, R12 + B callbackasm1(SB) + MOVD $501, R12 + B callbackasm1(SB) + MOVD $502, R12 + B callbackasm1(SB) + MOVD $503, R12 + B callbackasm1(SB) + MOVD $504, R12 + B callbackasm1(SB) + MOVD $505, R12 + B callbackasm1(SB) + MOVD $506, R12 + B callbackasm1(SB) + MOVD $507, R12 + B callbackasm1(SB) + MOVD $508, R12 + B callbackasm1(SB) + MOVD $509, R12 + B callbackasm1(SB) + MOVD $510, R12 + B callbackasm1(SB) + MOVD $511, R12 + B callbackasm1(SB) + MOVD $512, R12 + B callbackasm1(SB) + MOVD $513, R12 + B callbackasm1(SB) + MOVD $514, R12 + B callbackasm1(SB) + MOVD $515, R12 + B callbackasm1(SB) + MOVD $516, R12 + B callbackasm1(SB) + MOVD $517, R12 + B callbackasm1(SB) + MOVD $518, R12 + B callbackasm1(SB) + MOVD $519, R12 + B callbackasm1(SB) + MOVD $520, R12 + B callbackasm1(SB) + MOVD $521, R12 + B callbackasm1(SB) + MOVD $522, R12 + B callbackasm1(SB) + MOVD $523, R12 + B callbackasm1(SB) + MOVD $524, R12 + B callbackasm1(SB) + MOVD $525, R12 + B callbackasm1(SB) + MOVD $526, R12 + B callbackasm1(SB) + MOVD $527, R12 + B callbackasm1(SB) + MOVD $528, R12 + B callbackasm1(SB) + MOVD $529, R12 + B callbackasm1(SB) + MOVD $530, R12 + B callbackasm1(SB) + MOVD $531, R12 + B callbackasm1(SB) + MOVD $532, R12 + B callbackasm1(SB) + MOVD $533, R12 + B callbackasm1(SB) + MOVD $534, R12 + B callbackasm1(SB) + MOVD $535, R12 + B callbackasm1(SB) + MOVD $536, R12 + B callbackasm1(SB) + MOVD $537, R12 + B callbackasm1(SB) + MOVD $538, R12 + B callbackasm1(SB) + MOVD $539, R12 + B callbackasm1(SB) + MOVD $540, R12 + B callbackasm1(SB) + MOVD $541, R12 + B callbackasm1(SB) + MOVD $542, R12 + B callbackasm1(SB) + MOVD $543, R12 + B callbackasm1(SB) + MOVD $544, R12 + B callbackasm1(SB) + MOVD $545, R12 + B callbackasm1(SB) + MOVD $546, R12 + B callbackasm1(SB) + MOVD $547, R12 + B callbackasm1(SB) + MOVD $548, R12 + B callbackasm1(SB) + MOVD $549, R12 + B callbackasm1(SB) + MOVD $550, R12 + B callbackasm1(SB) + MOVD $551, R12 + B callbackasm1(SB) + MOVD $552, R12 + B callbackasm1(SB) + MOVD $553, R12 + B callbackasm1(SB) + MOVD $554, R12 + B callbackasm1(SB) + MOVD $555, R12 + B callbackasm1(SB) + MOVD $556, R12 + B callbackasm1(SB) + MOVD $557, R12 + B callbackasm1(SB) + MOVD $558, R12 + B callbackasm1(SB) + MOVD $559, R12 + B callbackasm1(SB) + MOVD $560, R12 + B callbackasm1(SB) + MOVD $561, R12 + B callbackasm1(SB) + MOVD $562, R12 + B callbackasm1(SB) + MOVD $563, R12 + B callbackasm1(SB) + MOVD $564, R12 + B callbackasm1(SB) + MOVD $565, R12 + B callbackasm1(SB) + MOVD $566, R12 + B callbackasm1(SB) + MOVD $567, R12 + B callbackasm1(SB) + MOVD $568, R12 + B callbackasm1(SB) + MOVD $569, R12 + B callbackasm1(SB) + MOVD $570, R12 + B callbackasm1(SB) + MOVD $571, R12 + B callbackasm1(SB) + MOVD $572, R12 + B callbackasm1(SB) + MOVD $573, R12 + B callbackasm1(SB) + MOVD $574, R12 + B callbackasm1(SB) + MOVD $575, R12 + B callbackasm1(SB) + MOVD $576, R12 + B callbackasm1(SB) + MOVD $577, R12 + B callbackasm1(SB) + MOVD $578, R12 + B callbackasm1(SB) + MOVD $579, R12 + B callbackasm1(SB) + MOVD $580, R12 + B callbackasm1(SB) + MOVD $581, R12 + B callbackasm1(SB) + MOVD $582, R12 + B callbackasm1(SB) + MOVD $583, R12 + B callbackasm1(SB) + MOVD $584, R12 + B callbackasm1(SB) + MOVD $585, R12 + B callbackasm1(SB) + MOVD $586, R12 + B callbackasm1(SB) + MOVD $587, R12 + B callbackasm1(SB) + MOVD $588, R12 + B callbackasm1(SB) + MOVD $589, R12 + B callbackasm1(SB) + MOVD $590, R12 + B callbackasm1(SB) + MOVD $591, R12 + B callbackasm1(SB) + MOVD $592, R12 + B callbackasm1(SB) + MOVD $593, R12 + B callbackasm1(SB) + MOVD $594, R12 + B callbackasm1(SB) + MOVD $595, R12 + B callbackasm1(SB) + MOVD $596, R12 + B callbackasm1(SB) + MOVD $597, R12 + B callbackasm1(SB) + MOVD $598, R12 + B callbackasm1(SB) + MOVD $599, R12 + B callbackasm1(SB) + MOVD $600, R12 + B callbackasm1(SB) + MOVD $601, R12 + B callbackasm1(SB) + MOVD $602, R12 + B callbackasm1(SB) + MOVD $603, R12 + B callbackasm1(SB) + MOVD $604, R12 + B callbackasm1(SB) + MOVD $605, R12 + B callbackasm1(SB) + MOVD $606, R12 + B callbackasm1(SB) + MOVD $607, R12 + B callbackasm1(SB) + MOVD $608, R12 + B callbackasm1(SB) + MOVD $609, R12 + B callbackasm1(SB) + MOVD $610, R12 + B callbackasm1(SB) + MOVD $611, R12 + B callbackasm1(SB) + MOVD $612, R12 + B callbackasm1(SB) + MOVD $613, R12 + B callbackasm1(SB) + MOVD $614, R12 + B callbackasm1(SB) + MOVD $615, R12 + B callbackasm1(SB) + MOVD $616, R12 + B callbackasm1(SB) + MOVD $617, R12 + B callbackasm1(SB) + MOVD $618, R12 + B callbackasm1(SB) + MOVD $619, R12 + B callbackasm1(SB) + MOVD $620, R12 + B callbackasm1(SB) + MOVD $621, R12 + B callbackasm1(SB) + MOVD $622, R12 + B callbackasm1(SB) + MOVD $623, R12 + B callbackasm1(SB) + MOVD $624, R12 + B callbackasm1(SB) + MOVD $625, R12 + B callbackasm1(SB) + MOVD $626, R12 + B callbackasm1(SB) + MOVD $627, R12 + B callbackasm1(SB) + MOVD $628, R12 + B callbackasm1(SB) + MOVD $629, R12 + B callbackasm1(SB) + MOVD $630, R12 + B callbackasm1(SB) + MOVD $631, R12 + B callbackasm1(SB) + MOVD $632, R12 + B callbackasm1(SB) + MOVD $633, R12 + B callbackasm1(SB) + MOVD $634, R12 + B callbackasm1(SB) + MOVD $635, R12 + B callbackasm1(SB) + MOVD $636, R12 + B callbackasm1(SB) + MOVD $637, R12 + B callbackasm1(SB) + MOVD $638, R12 + B callbackasm1(SB) + MOVD $639, R12 + B callbackasm1(SB) + MOVD $640, R12 + B callbackasm1(SB) + MOVD $641, R12 + B callbackasm1(SB) + MOVD $642, R12 + B callbackasm1(SB) + MOVD $643, R12 + B callbackasm1(SB) + MOVD $644, R12 + B callbackasm1(SB) + MOVD $645, R12 + B callbackasm1(SB) + MOVD $646, R12 + B callbackasm1(SB) + MOVD $647, R12 + B callbackasm1(SB) + MOVD $648, R12 + B callbackasm1(SB) + MOVD $649, R12 + B callbackasm1(SB) + MOVD $650, R12 + B callbackasm1(SB) + MOVD $651, R12 + B callbackasm1(SB) + MOVD $652, R12 + B callbackasm1(SB) + MOVD $653, R12 + B callbackasm1(SB) + MOVD $654, R12 + B callbackasm1(SB) + MOVD $655, R12 + B callbackasm1(SB) + MOVD $656, R12 + B callbackasm1(SB) + MOVD $657, R12 + B callbackasm1(SB) + MOVD $658, R12 + B callbackasm1(SB) + MOVD $659, R12 + B callbackasm1(SB) + MOVD $660, R12 + B callbackasm1(SB) + MOVD $661, R12 + B callbackasm1(SB) + MOVD $662, R12 + B callbackasm1(SB) + MOVD $663, R12 + B callbackasm1(SB) + MOVD $664, R12 + B callbackasm1(SB) + MOVD $665, R12 + B callbackasm1(SB) + MOVD $666, R12 + B callbackasm1(SB) + MOVD $667, R12 + B callbackasm1(SB) + MOVD $668, R12 + B callbackasm1(SB) + MOVD $669, R12 + B callbackasm1(SB) + MOVD $670, R12 + B callbackasm1(SB) + MOVD $671, R12 + B callbackasm1(SB) + MOVD $672, R12 + B callbackasm1(SB) + MOVD $673, R12 + B callbackasm1(SB) + MOVD $674, R12 + B callbackasm1(SB) + MOVD $675, R12 + B callbackasm1(SB) + MOVD $676, R12 + B callbackasm1(SB) + MOVD $677, R12 + B callbackasm1(SB) + MOVD $678, R12 + B callbackasm1(SB) + MOVD $679, R12 + B callbackasm1(SB) + MOVD $680, R12 + B callbackasm1(SB) + MOVD $681, R12 + B callbackasm1(SB) + MOVD $682, R12 + B callbackasm1(SB) + MOVD $683, R12 + B callbackasm1(SB) + MOVD $684, R12 + B callbackasm1(SB) + MOVD $685, R12 + B callbackasm1(SB) + MOVD $686, R12 + B callbackasm1(SB) + MOVD $687, R12 + B callbackasm1(SB) + MOVD $688, R12 + B callbackasm1(SB) + MOVD $689, R12 + B callbackasm1(SB) + MOVD $690, R12 + B callbackasm1(SB) + MOVD $691, R12 + B callbackasm1(SB) + MOVD $692, R12 + B callbackasm1(SB) + MOVD $693, R12 + B callbackasm1(SB) + MOVD $694, R12 + B callbackasm1(SB) + MOVD $695, R12 + B callbackasm1(SB) + MOVD $696, R12 + B callbackasm1(SB) + MOVD $697, R12 + B callbackasm1(SB) + MOVD $698, R12 + B callbackasm1(SB) + MOVD $699, R12 + B callbackasm1(SB) + MOVD $700, R12 + B callbackasm1(SB) + MOVD $701, R12 + B callbackasm1(SB) + MOVD $702, R12 + B callbackasm1(SB) + MOVD $703, R12 + B callbackasm1(SB) + MOVD $704, R12 + B callbackasm1(SB) + MOVD $705, R12 + B callbackasm1(SB) + MOVD $706, R12 + B callbackasm1(SB) + MOVD $707, R12 + B callbackasm1(SB) + MOVD $708, R12 + B callbackasm1(SB) + MOVD $709, R12 + B callbackasm1(SB) + MOVD $710, R12 + B callbackasm1(SB) + MOVD $711, R12 + B callbackasm1(SB) + MOVD $712, R12 + B callbackasm1(SB) + MOVD $713, R12 + B callbackasm1(SB) + MOVD $714, R12 + B callbackasm1(SB) + MOVD $715, R12 + B callbackasm1(SB) + MOVD $716, R12 + B callbackasm1(SB) + MOVD $717, R12 + B callbackasm1(SB) + MOVD $718, R12 + B callbackasm1(SB) + MOVD $719, R12 + B callbackasm1(SB) + MOVD $720, R12 + B callbackasm1(SB) + MOVD $721, R12 + B callbackasm1(SB) + MOVD $722, R12 + B callbackasm1(SB) + MOVD $723, R12 + B callbackasm1(SB) + MOVD $724, R12 + B callbackasm1(SB) + MOVD $725, R12 + B callbackasm1(SB) + MOVD $726, R12 + B callbackasm1(SB) + MOVD $727, R12 + B callbackasm1(SB) + MOVD $728, R12 + B callbackasm1(SB) + MOVD $729, R12 + B callbackasm1(SB) + MOVD $730, R12 + B callbackasm1(SB) + MOVD $731, R12 + B callbackasm1(SB) + MOVD $732, R12 + B callbackasm1(SB) + MOVD $733, R12 + B callbackasm1(SB) + MOVD $734, R12 + B callbackasm1(SB) + MOVD $735, R12 + B callbackasm1(SB) + MOVD $736, R12 + B callbackasm1(SB) + MOVD $737, R12 + B callbackasm1(SB) + MOVD $738, R12 + B callbackasm1(SB) + MOVD $739, R12 + B callbackasm1(SB) + MOVD $740, R12 + B callbackasm1(SB) + MOVD $741, R12 + B callbackasm1(SB) + MOVD $742, R12 + B callbackasm1(SB) + MOVD $743, R12 + B callbackasm1(SB) + MOVD $744, R12 + B callbackasm1(SB) + MOVD $745, R12 + B callbackasm1(SB) + MOVD $746, R12 + B callbackasm1(SB) + MOVD $747, R12 + B callbackasm1(SB) + MOVD $748, R12 + B callbackasm1(SB) + MOVD $749, R12 + B callbackasm1(SB) + MOVD $750, R12 + B callbackasm1(SB) + MOVD $751, R12 + B callbackasm1(SB) + MOVD $752, R12 + B callbackasm1(SB) + MOVD $753, R12 + B callbackasm1(SB) + MOVD $754, R12 + B callbackasm1(SB) + MOVD $755, R12 + B callbackasm1(SB) + MOVD $756, R12 + B callbackasm1(SB) + MOVD $757, R12 + B callbackasm1(SB) + MOVD $758, R12 + B callbackasm1(SB) + MOVD $759, R12 + B callbackasm1(SB) + MOVD $760, R12 + B callbackasm1(SB) + MOVD $761, R12 + B callbackasm1(SB) + MOVD $762, R12 + B callbackasm1(SB) + MOVD $763, R12 + B callbackasm1(SB) + MOVD $764, R12 + B callbackasm1(SB) + MOVD $765, R12 + B callbackasm1(SB) + MOVD $766, R12 + B callbackasm1(SB) + MOVD $767, R12 + B callbackasm1(SB) + MOVD $768, R12 + B callbackasm1(SB) + MOVD $769, R12 + B callbackasm1(SB) + MOVD $770, R12 + B callbackasm1(SB) + MOVD $771, R12 + B callbackasm1(SB) + MOVD $772, R12 + B callbackasm1(SB) + MOVD $773, R12 + B callbackasm1(SB) + MOVD $774, R12 + B callbackasm1(SB) + MOVD $775, R12 + B callbackasm1(SB) + MOVD $776, R12 + B callbackasm1(SB) + MOVD $777, R12 + B callbackasm1(SB) + MOVD $778, R12 + B callbackasm1(SB) + MOVD $779, R12 + B callbackasm1(SB) + MOVD $780, R12 + B callbackasm1(SB) + MOVD $781, R12 + B callbackasm1(SB) + MOVD $782, R12 + B callbackasm1(SB) + MOVD $783, R12 + B callbackasm1(SB) + MOVD $784, R12 + B callbackasm1(SB) + MOVD $785, R12 + B callbackasm1(SB) + MOVD $786, R12 + B callbackasm1(SB) + MOVD $787, R12 + B callbackasm1(SB) + MOVD $788, R12 + B callbackasm1(SB) + MOVD $789, R12 + B callbackasm1(SB) + MOVD $790, R12 + B callbackasm1(SB) + MOVD $791, R12 + B callbackasm1(SB) + MOVD $792, R12 + B callbackasm1(SB) + MOVD $793, R12 + B callbackasm1(SB) + MOVD $794, R12 + B callbackasm1(SB) + MOVD $795, R12 + B callbackasm1(SB) + MOVD $796, R12 + B callbackasm1(SB) + MOVD $797, R12 + B callbackasm1(SB) + MOVD $798, R12 + B callbackasm1(SB) + MOVD $799, R12 + B callbackasm1(SB) + MOVD $800, R12 + B callbackasm1(SB) + MOVD $801, R12 + B callbackasm1(SB) + MOVD $802, R12 + B callbackasm1(SB) + MOVD $803, R12 + B callbackasm1(SB) + MOVD $804, R12 + B callbackasm1(SB) + MOVD $805, R12 + B callbackasm1(SB) + MOVD $806, R12 + B callbackasm1(SB) + MOVD $807, R12 + B callbackasm1(SB) + MOVD $808, R12 + B callbackasm1(SB) + MOVD $809, R12 + B callbackasm1(SB) + MOVD $810, R12 + B callbackasm1(SB) + MOVD $811, R12 + B callbackasm1(SB) + MOVD $812, R12 + B callbackasm1(SB) + MOVD $813, R12 + B callbackasm1(SB) + MOVD $814, R12 + B callbackasm1(SB) + MOVD $815, R12 + B callbackasm1(SB) + MOVD $816, R12 + B callbackasm1(SB) + MOVD $817, R12 + B callbackasm1(SB) + MOVD $818, R12 + B callbackasm1(SB) + MOVD $819, R12 + B callbackasm1(SB) + MOVD $820, R12 + B callbackasm1(SB) + MOVD $821, R12 + B callbackasm1(SB) + MOVD $822, R12 + B callbackasm1(SB) + MOVD $823, R12 + B callbackasm1(SB) + MOVD $824, R12 + B callbackasm1(SB) + MOVD $825, R12 + B callbackasm1(SB) + MOVD $826, R12 + B callbackasm1(SB) + MOVD $827, R12 + B callbackasm1(SB) + MOVD $828, R12 + B callbackasm1(SB) + MOVD $829, R12 + B callbackasm1(SB) + MOVD $830, R12 + B callbackasm1(SB) + MOVD $831, R12 + B callbackasm1(SB) + MOVD $832, R12 + B callbackasm1(SB) + MOVD $833, R12 + B callbackasm1(SB) + MOVD $834, R12 + B callbackasm1(SB) + MOVD $835, R12 + B callbackasm1(SB) + MOVD $836, R12 + B callbackasm1(SB) + MOVD $837, R12 + B callbackasm1(SB) + MOVD $838, R12 + B callbackasm1(SB) + MOVD $839, R12 + B callbackasm1(SB) + MOVD $840, R12 + B callbackasm1(SB) + MOVD $841, R12 + B callbackasm1(SB) + MOVD $842, R12 + B callbackasm1(SB) + MOVD $843, R12 + B callbackasm1(SB) + MOVD $844, R12 + B callbackasm1(SB) + MOVD $845, R12 + B callbackasm1(SB) + MOVD $846, R12 + B callbackasm1(SB) + MOVD $847, R12 + B callbackasm1(SB) + MOVD $848, R12 + B callbackasm1(SB) + MOVD $849, R12 + B callbackasm1(SB) + MOVD $850, R12 + B callbackasm1(SB) + MOVD $851, R12 + B callbackasm1(SB) + MOVD $852, R12 + B callbackasm1(SB) + MOVD $853, R12 + B callbackasm1(SB) + MOVD $854, R12 + B callbackasm1(SB) + MOVD $855, R12 + B callbackasm1(SB) + MOVD $856, R12 + B callbackasm1(SB) + MOVD $857, R12 + B callbackasm1(SB) + MOVD $858, R12 + B callbackasm1(SB) + MOVD $859, R12 + B callbackasm1(SB) + MOVD $860, R12 + B callbackasm1(SB) + MOVD $861, R12 + B callbackasm1(SB) + MOVD $862, R12 + B callbackasm1(SB) + MOVD $863, R12 + B callbackasm1(SB) + MOVD $864, R12 + B callbackasm1(SB) + MOVD $865, R12 + B callbackasm1(SB) + MOVD $866, R12 + B callbackasm1(SB) + MOVD $867, R12 + B callbackasm1(SB) + MOVD $868, R12 + B callbackasm1(SB) + MOVD $869, R12 + B callbackasm1(SB) + MOVD $870, R12 + B callbackasm1(SB) + MOVD $871, R12 + B callbackasm1(SB) + MOVD $872, R12 + B callbackasm1(SB) + MOVD $873, R12 + B callbackasm1(SB) + MOVD $874, R12 + B callbackasm1(SB) + MOVD $875, R12 + B callbackasm1(SB) + MOVD $876, R12 + B callbackasm1(SB) + MOVD $877, R12 + B callbackasm1(SB) + MOVD $878, R12 + B callbackasm1(SB) + MOVD $879, R12 + B callbackasm1(SB) + MOVD $880, R12 + B callbackasm1(SB) + MOVD $881, R12 + B callbackasm1(SB) + MOVD $882, R12 + B callbackasm1(SB) + MOVD $883, R12 + B callbackasm1(SB) + MOVD $884, R12 + B callbackasm1(SB) + MOVD $885, R12 + B callbackasm1(SB) + MOVD $886, R12 + B callbackasm1(SB) + MOVD $887, R12 + B callbackasm1(SB) + MOVD $888, R12 + B callbackasm1(SB) + MOVD $889, R12 + B callbackasm1(SB) + MOVD $890, R12 + B callbackasm1(SB) + MOVD $891, R12 + B callbackasm1(SB) + MOVD $892, R12 + B callbackasm1(SB) + MOVD $893, R12 + B callbackasm1(SB) + MOVD $894, R12 + B callbackasm1(SB) + MOVD $895, R12 + B callbackasm1(SB) + MOVD $896, R12 + B callbackasm1(SB) + MOVD $897, R12 + B callbackasm1(SB) + MOVD $898, R12 + B callbackasm1(SB) + MOVD $899, R12 + B callbackasm1(SB) + MOVD $900, R12 + B callbackasm1(SB) + MOVD $901, R12 + B callbackasm1(SB) + MOVD $902, R12 + B callbackasm1(SB) + MOVD $903, R12 + B callbackasm1(SB) + MOVD $904, R12 + B callbackasm1(SB) + MOVD $905, R12 + B callbackasm1(SB) + MOVD $906, R12 + B callbackasm1(SB) + MOVD $907, R12 + B callbackasm1(SB) + MOVD $908, R12 + B callbackasm1(SB) + MOVD $909, R12 + B callbackasm1(SB) + MOVD $910, R12 + B callbackasm1(SB) + MOVD $911, R12 + B callbackasm1(SB) + MOVD $912, R12 + B callbackasm1(SB) + MOVD $913, R12 + B callbackasm1(SB) + MOVD $914, R12 + B callbackasm1(SB) + MOVD $915, R12 + B callbackasm1(SB) + MOVD $916, R12 + B callbackasm1(SB) + MOVD $917, R12 + B callbackasm1(SB) + MOVD $918, R12 + B callbackasm1(SB) + MOVD $919, R12 + B callbackasm1(SB) + MOVD $920, R12 + B callbackasm1(SB) + MOVD $921, R12 + B callbackasm1(SB) + MOVD $922, R12 + B callbackasm1(SB) + MOVD $923, R12 + B callbackasm1(SB) + MOVD $924, R12 + B callbackasm1(SB) + MOVD $925, R12 + B callbackasm1(SB) + MOVD $926, R12 + B callbackasm1(SB) + MOVD $927, R12 + B callbackasm1(SB) + MOVD $928, R12 + B callbackasm1(SB) + MOVD $929, R12 + B callbackasm1(SB) + MOVD $930, R12 + B callbackasm1(SB) + MOVD $931, R12 + B callbackasm1(SB) + MOVD $932, R12 + B callbackasm1(SB) + MOVD $933, R12 + B callbackasm1(SB) + MOVD $934, R12 + B callbackasm1(SB) + MOVD $935, R12 + B callbackasm1(SB) + MOVD $936, R12 + B callbackasm1(SB) + MOVD $937, R12 + B callbackasm1(SB) + MOVD $938, R12 + B callbackasm1(SB) + MOVD $939, R12 + B callbackasm1(SB) + MOVD $940, R12 + B callbackasm1(SB) + MOVD $941, R12 + B callbackasm1(SB) + MOVD $942, R12 + B callbackasm1(SB) + MOVD $943, R12 + B callbackasm1(SB) + MOVD $944, R12 + B callbackasm1(SB) + MOVD $945, R12 + B callbackasm1(SB) + MOVD $946, R12 + B callbackasm1(SB) + MOVD $947, R12 + B callbackasm1(SB) + MOVD $948, R12 + B callbackasm1(SB) + MOVD $949, R12 + B callbackasm1(SB) + MOVD $950, R12 + B callbackasm1(SB) + MOVD $951, R12 + B callbackasm1(SB) + MOVD $952, R12 + B callbackasm1(SB) + MOVD $953, R12 + B callbackasm1(SB) + MOVD $954, R12 + B callbackasm1(SB) + MOVD $955, R12 + B callbackasm1(SB) + MOVD $956, R12 + B callbackasm1(SB) + MOVD $957, R12 + B callbackasm1(SB) + MOVD $958, R12 + B callbackasm1(SB) + MOVD $959, R12 + B callbackasm1(SB) + MOVD $960, R12 + B callbackasm1(SB) + MOVD $961, R12 + B callbackasm1(SB) + MOVD $962, R12 + B callbackasm1(SB) + MOVD $963, R12 + B callbackasm1(SB) + MOVD $964, R12 + B callbackasm1(SB) + MOVD $965, R12 + B callbackasm1(SB) + MOVD $966, R12 + B callbackasm1(SB) + MOVD $967, R12 + B callbackasm1(SB) + MOVD $968, R12 + B callbackasm1(SB) + MOVD $969, R12 + B callbackasm1(SB) + MOVD $970, R12 + B callbackasm1(SB) + MOVD $971, R12 + B callbackasm1(SB) + MOVD $972, R12 + B callbackasm1(SB) + MOVD $973, R12 + B callbackasm1(SB) + MOVD $974, R12 + B callbackasm1(SB) + MOVD $975, R12 + B callbackasm1(SB) + MOVD $976, R12 + B callbackasm1(SB) + MOVD $977, R12 + B callbackasm1(SB) + MOVD $978, R12 + B callbackasm1(SB) + MOVD $979, R12 + B callbackasm1(SB) + MOVD $980, R12 + B callbackasm1(SB) + MOVD $981, R12 + B callbackasm1(SB) + MOVD $982, R12 + B callbackasm1(SB) + MOVD $983, R12 + B callbackasm1(SB) + MOVD $984, R12 + B callbackasm1(SB) + MOVD $985, R12 + B callbackasm1(SB) + MOVD $986, R12 + B callbackasm1(SB) + MOVD $987, R12 + B callbackasm1(SB) + MOVD $988, R12 + B callbackasm1(SB) + MOVD $989, R12 + B callbackasm1(SB) + MOVD $990, R12 + B callbackasm1(SB) + MOVD $991, R12 + B callbackasm1(SB) + MOVD $992, R12 + B callbackasm1(SB) + MOVD $993, R12 + B callbackasm1(SB) + MOVD $994, R12 + B callbackasm1(SB) + MOVD $995, R12 + B callbackasm1(SB) + MOVD $996, R12 + B callbackasm1(SB) + MOVD $997, R12 + B callbackasm1(SB) + MOVD $998, R12 + B callbackasm1(SB) + MOVD $999, R12 + B callbackasm1(SB) + MOVD $1000, R12 + B callbackasm1(SB) + MOVD $1001, R12 + B callbackasm1(SB) + MOVD $1002, R12 + B callbackasm1(SB) + MOVD $1003, R12 + B callbackasm1(SB) + MOVD $1004, R12 + B callbackasm1(SB) + MOVD $1005, R12 + B callbackasm1(SB) + MOVD $1006, R12 + B callbackasm1(SB) + MOVD $1007, R12 + B callbackasm1(SB) + MOVD $1008, R12 + B callbackasm1(SB) + MOVD $1009, R12 + B callbackasm1(SB) + MOVD $1010, R12 + B callbackasm1(SB) + MOVD $1011, R12 + B callbackasm1(SB) + MOVD $1012, R12 + B callbackasm1(SB) + MOVD $1013, R12 + B callbackasm1(SB) + MOVD $1014, R12 + B callbackasm1(SB) + MOVD $1015, R12 + B callbackasm1(SB) + MOVD $1016, R12 + B callbackasm1(SB) + MOVD $1017, R12 + B callbackasm1(SB) + MOVD $1018, R12 + B callbackasm1(SB) + MOVD $1019, R12 + B callbackasm1(SB) + MOVD $1020, R12 + B callbackasm1(SB) + MOVD $1021, R12 + B callbackasm1(SB) + MOVD $1022, R12 + B callbackasm1(SB) + MOVD $1023, R12 + B callbackasm1(SB) + MOVD $1024, R12 + B callbackasm1(SB) + MOVD $1025, R12 + B callbackasm1(SB) + MOVD $1026, R12 + B callbackasm1(SB) + MOVD $1027, R12 + B callbackasm1(SB) + MOVD $1028, R12 + B callbackasm1(SB) + MOVD $1029, R12 + B callbackasm1(SB) + MOVD $1030, R12 + B callbackasm1(SB) + MOVD $1031, R12 + B callbackasm1(SB) + MOVD $1032, R12 + B callbackasm1(SB) + MOVD $1033, R12 + B callbackasm1(SB) + MOVD $1034, R12 + B callbackasm1(SB) + MOVD $1035, R12 + B callbackasm1(SB) + MOVD $1036, R12 + B callbackasm1(SB) + MOVD $1037, R12 + B callbackasm1(SB) + MOVD $1038, R12 + B callbackasm1(SB) + MOVD $1039, R12 + B callbackasm1(SB) + MOVD $1040, R12 + B callbackasm1(SB) + MOVD $1041, R12 + B callbackasm1(SB) + MOVD $1042, R12 + B callbackasm1(SB) + MOVD $1043, R12 + B callbackasm1(SB) + MOVD $1044, R12 + B callbackasm1(SB) + MOVD $1045, R12 + B callbackasm1(SB) + MOVD $1046, R12 + B callbackasm1(SB) + MOVD $1047, R12 + B callbackasm1(SB) + MOVD $1048, R12 + B callbackasm1(SB) + MOVD $1049, R12 + B callbackasm1(SB) + MOVD $1050, R12 + B callbackasm1(SB) + MOVD $1051, R12 + B callbackasm1(SB) + MOVD $1052, R12 + B callbackasm1(SB) + MOVD $1053, R12 + B callbackasm1(SB) + MOVD $1054, R12 + B callbackasm1(SB) + MOVD $1055, R12 + B callbackasm1(SB) + MOVD $1056, R12 + B callbackasm1(SB) + MOVD $1057, R12 + B callbackasm1(SB) + MOVD $1058, R12 + B callbackasm1(SB) + MOVD $1059, R12 + B callbackasm1(SB) + MOVD $1060, R12 + B callbackasm1(SB) + MOVD $1061, R12 + B callbackasm1(SB) + MOVD $1062, R12 + B callbackasm1(SB) + MOVD $1063, R12 + B callbackasm1(SB) + MOVD $1064, R12 + B callbackasm1(SB) + MOVD $1065, R12 + B callbackasm1(SB) + MOVD $1066, R12 + B callbackasm1(SB) + MOVD $1067, R12 + B callbackasm1(SB) + MOVD $1068, R12 + B callbackasm1(SB) + MOVD $1069, R12 + B callbackasm1(SB) + MOVD $1070, R12 + B callbackasm1(SB) + MOVD $1071, R12 + B callbackasm1(SB) + MOVD $1072, R12 + B callbackasm1(SB) + MOVD $1073, R12 + B callbackasm1(SB) + MOVD $1074, R12 + B callbackasm1(SB) + MOVD $1075, R12 + B callbackasm1(SB) + MOVD $1076, R12 + B callbackasm1(SB) + MOVD $1077, R12 + B callbackasm1(SB) + MOVD $1078, R12 + B callbackasm1(SB) + MOVD $1079, R12 + B callbackasm1(SB) + MOVD $1080, R12 + B callbackasm1(SB) + MOVD $1081, R12 + B callbackasm1(SB) + MOVD $1082, R12 + B callbackasm1(SB) + MOVD $1083, R12 + B callbackasm1(SB) + MOVD $1084, R12 + B callbackasm1(SB) + MOVD $1085, R12 + B callbackasm1(SB) + MOVD $1086, R12 + B callbackasm1(SB) + MOVD $1087, R12 + B callbackasm1(SB) + MOVD $1088, R12 + B callbackasm1(SB) + MOVD $1089, R12 + B callbackasm1(SB) + MOVD $1090, R12 + B callbackasm1(SB) + MOVD $1091, R12 + B callbackasm1(SB) + MOVD $1092, R12 + B callbackasm1(SB) + MOVD $1093, R12 + B callbackasm1(SB) + MOVD $1094, R12 + B callbackasm1(SB) + MOVD $1095, R12 + B callbackasm1(SB) + MOVD $1096, R12 + B callbackasm1(SB) + MOVD $1097, R12 + B callbackasm1(SB) + MOVD $1098, R12 + B callbackasm1(SB) + MOVD $1099, R12 + B callbackasm1(SB) + MOVD $1100, R12 + B callbackasm1(SB) + MOVD $1101, R12 + B callbackasm1(SB) + MOVD $1102, R12 + B callbackasm1(SB) + MOVD $1103, R12 + B callbackasm1(SB) + MOVD $1104, R12 + B callbackasm1(SB) + MOVD $1105, R12 + B callbackasm1(SB) + MOVD $1106, R12 + B callbackasm1(SB) + MOVD $1107, R12 + B callbackasm1(SB) + MOVD $1108, R12 + B callbackasm1(SB) + MOVD $1109, R12 + B callbackasm1(SB) + MOVD $1110, R12 + B callbackasm1(SB) + MOVD $1111, R12 + B callbackasm1(SB) + MOVD $1112, R12 + B callbackasm1(SB) + MOVD $1113, R12 + B callbackasm1(SB) + MOVD $1114, R12 + B callbackasm1(SB) + MOVD $1115, R12 + B callbackasm1(SB) + MOVD $1116, R12 + B callbackasm1(SB) + MOVD $1117, R12 + B callbackasm1(SB) + MOVD $1118, R12 + B callbackasm1(SB) + MOVD $1119, R12 + B callbackasm1(SB) + MOVD $1120, R12 + B callbackasm1(SB) + MOVD $1121, R12 + B callbackasm1(SB) + MOVD $1122, R12 + B callbackasm1(SB) + MOVD $1123, R12 + B callbackasm1(SB) + MOVD $1124, R12 + B callbackasm1(SB) + MOVD $1125, R12 + B callbackasm1(SB) + MOVD $1126, R12 + B callbackasm1(SB) + MOVD $1127, R12 + B callbackasm1(SB) + MOVD $1128, R12 + B callbackasm1(SB) + MOVD $1129, R12 + B callbackasm1(SB) + MOVD $1130, R12 + B callbackasm1(SB) + MOVD $1131, R12 + B callbackasm1(SB) + MOVD $1132, R12 + B callbackasm1(SB) + MOVD $1133, R12 + B callbackasm1(SB) + MOVD $1134, R12 + B callbackasm1(SB) + MOVD $1135, R12 + B callbackasm1(SB) + MOVD $1136, R12 + B callbackasm1(SB) + MOVD $1137, R12 + B callbackasm1(SB) + MOVD $1138, R12 + B callbackasm1(SB) + MOVD $1139, R12 + B callbackasm1(SB) + MOVD $1140, R12 + B callbackasm1(SB) + MOVD $1141, R12 + B callbackasm1(SB) + MOVD $1142, R12 + B callbackasm1(SB) + MOVD $1143, R12 + B callbackasm1(SB) + MOVD $1144, R12 + B callbackasm1(SB) + MOVD $1145, R12 + B callbackasm1(SB) + MOVD $1146, R12 + B callbackasm1(SB) + MOVD $1147, R12 + B callbackasm1(SB) + MOVD $1148, R12 + B callbackasm1(SB) + MOVD $1149, R12 + B callbackasm1(SB) + MOVD $1150, R12 + B callbackasm1(SB) + MOVD $1151, R12 + B callbackasm1(SB) + MOVD $1152, R12 + B callbackasm1(SB) + MOVD $1153, R12 + B callbackasm1(SB) + MOVD $1154, R12 + B callbackasm1(SB) + MOVD $1155, R12 + B callbackasm1(SB) + MOVD $1156, R12 + B callbackasm1(SB) + MOVD $1157, R12 + B callbackasm1(SB) + MOVD $1158, R12 + B callbackasm1(SB) + MOVD $1159, R12 + B callbackasm1(SB) + MOVD $1160, R12 + B callbackasm1(SB) + MOVD $1161, R12 + B callbackasm1(SB) + MOVD $1162, R12 + B callbackasm1(SB) + MOVD $1163, R12 + B callbackasm1(SB) + MOVD $1164, R12 + B callbackasm1(SB) + MOVD $1165, R12 + B callbackasm1(SB) + MOVD $1166, R12 + B callbackasm1(SB) + MOVD $1167, R12 + B callbackasm1(SB) + MOVD $1168, R12 + B callbackasm1(SB) + MOVD $1169, R12 + B callbackasm1(SB) + MOVD $1170, R12 + B callbackasm1(SB) + MOVD $1171, R12 + B callbackasm1(SB) + MOVD $1172, R12 + B callbackasm1(SB) + MOVD $1173, R12 + B callbackasm1(SB) + MOVD $1174, R12 + B callbackasm1(SB) + MOVD $1175, R12 + B callbackasm1(SB) + MOVD $1176, R12 + B callbackasm1(SB) + MOVD $1177, R12 + B callbackasm1(SB) + MOVD $1178, R12 + B callbackasm1(SB) + MOVD $1179, R12 + B callbackasm1(SB) + MOVD $1180, R12 + B callbackasm1(SB) + MOVD $1181, R12 + B callbackasm1(SB) + MOVD $1182, R12 + B callbackasm1(SB) + MOVD $1183, R12 + B callbackasm1(SB) + MOVD $1184, R12 + B callbackasm1(SB) + MOVD $1185, R12 + B callbackasm1(SB) + MOVD $1186, R12 + B callbackasm1(SB) + MOVD $1187, R12 + B callbackasm1(SB) + MOVD $1188, R12 + B callbackasm1(SB) + MOVD $1189, R12 + B callbackasm1(SB) + MOVD $1190, R12 + B callbackasm1(SB) + MOVD $1191, R12 + B callbackasm1(SB) + MOVD $1192, R12 + B callbackasm1(SB) + MOVD $1193, R12 + B callbackasm1(SB) + MOVD $1194, R12 + B callbackasm1(SB) + MOVD $1195, R12 + B callbackasm1(SB) + MOVD $1196, R12 + B callbackasm1(SB) + MOVD $1197, R12 + B callbackasm1(SB) + MOVD $1198, R12 + B callbackasm1(SB) + MOVD $1199, R12 + B callbackasm1(SB) + MOVD $1200, R12 + B callbackasm1(SB) + MOVD $1201, R12 + B callbackasm1(SB) + MOVD $1202, R12 + B callbackasm1(SB) + MOVD $1203, R12 + B callbackasm1(SB) + MOVD $1204, R12 + B callbackasm1(SB) + MOVD $1205, R12 + B callbackasm1(SB) + MOVD $1206, R12 + B callbackasm1(SB) + MOVD $1207, R12 + B callbackasm1(SB) + MOVD $1208, R12 + B callbackasm1(SB) + MOVD $1209, R12 + B callbackasm1(SB) + MOVD $1210, R12 + B callbackasm1(SB) + MOVD $1211, R12 + B callbackasm1(SB) + MOVD $1212, R12 + B callbackasm1(SB) + MOVD $1213, R12 + B callbackasm1(SB) + MOVD $1214, R12 + B callbackasm1(SB) + MOVD $1215, R12 + B callbackasm1(SB) + MOVD $1216, R12 + B callbackasm1(SB) + MOVD $1217, R12 + B callbackasm1(SB) + MOVD $1218, R12 + B callbackasm1(SB) + MOVD $1219, R12 + B callbackasm1(SB) + MOVD $1220, R12 + B callbackasm1(SB) + MOVD $1221, R12 + B callbackasm1(SB) + MOVD $1222, R12 + B callbackasm1(SB) + MOVD $1223, R12 + B callbackasm1(SB) + MOVD $1224, R12 + B callbackasm1(SB) + MOVD $1225, R12 + B callbackasm1(SB) + MOVD $1226, R12 + B callbackasm1(SB) + MOVD $1227, R12 + B callbackasm1(SB) + MOVD $1228, R12 + B callbackasm1(SB) + MOVD $1229, R12 + B callbackasm1(SB) + MOVD $1230, R12 + B callbackasm1(SB) + MOVD $1231, R12 + B callbackasm1(SB) + MOVD $1232, R12 + B callbackasm1(SB) + MOVD $1233, R12 + B callbackasm1(SB) + MOVD $1234, R12 + B callbackasm1(SB) + MOVD $1235, R12 + B callbackasm1(SB) + MOVD $1236, R12 + B callbackasm1(SB) + MOVD $1237, R12 + B callbackasm1(SB) + MOVD $1238, R12 + B callbackasm1(SB) + MOVD $1239, R12 + B callbackasm1(SB) + MOVD $1240, R12 + B callbackasm1(SB) + MOVD $1241, R12 + B callbackasm1(SB) + MOVD $1242, R12 + B callbackasm1(SB) + MOVD $1243, R12 + B callbackasm1(SB) + MOVD $1244, R12 + B callbackasm1(SB) + MOVD $1245, R12 + B callbackasm1(SB) + MOVD $1246, R12 + B callbackasm1(SB) + MOVD $1247, R12 + B callbackasm1(SB) + MOVD $1248, R12 + B callbackasm1(SB) + MOVD $1249, R12 + B callbackasm1(SB) + MOVD $1250, R12 + B callbackasm1(SB) + MOVD $1251, R12 + B callbackasm1(SB) + MOVD $1252, R12 + B callbackasm1(SB) + MOVD $1253, R12 + B callbackasm1(SB) + MOVD $1254, R12 + B callbackasm1(SB) + MOVD $1255, R12 + B callbackasm1(SB) + MOVD $1256, R12 + B callbackasm1(SB) + MOVD $1257, R12 + B callbackasm1(SB) + MOVD $1258, R12 + B callbackasm1(SB) + MOVD $1259, R12 + B callbackasm1(SB) + MOVD $1260, R12 + B callbackasm1(SB) + MOVD $1261, R12 + B callbackasm1(SB) + MOVD $1262, R12 + B callbackasm1(SB) + MOVD $1263, R12 + B callbackasm1(SB) + MOVD $1264, R12 + B callbackasm1(SB) + MOVD $1265, R12 + B callbackasm1(SB) + MOVD $1266, R12 + B callbackasm1(SB) + MOVD $1267, R12 + B callbackasm1(SB) + MOVD $1268, R12 + B callbackasm1(SB) + MOVD $1269, R12 + B callbackasm1(SB) + MOVD $1270, R12 + B callbackasm1(SB) + MOVD $1271, R12 + B callbackasm1(SB) + MOVD $1272, R12 + B callbackasm1(SB) + MOVD $1273, R12 + B callbackasm1(SB) + MOVD $1274, R12 + B callbackasm1(SB) + MOVD $1275, R12 + B callbackasm1(SB) + MOVD $1276, R12 + B callbackasm1(SB) + MOVD $1277, R12 + B callbackasm1(SB) + MOVD $1278, R12 + B callbackasm1(SB) + MOVD $1279, R12 + B callbackasm1(SB) + MOVD $1280, R12 + B callbackasm1(SB) + MOVD $1281, R12 + B callbackasm1(SB) + MOVD $1282, R12 + B callbackasm1(SB) + MOVD $1283, R12 + B callbackasm1(SB) + MOVD $1284, R12 + B callbackasm1(SB) + MOVD $1285, R12 + B callbackasm1(SB) + MOVD $1286, R12 + B callbackasm1(SB) + MOVD $1287, R12 + B callbackasm1(SB) + MOVD $1288, R12 + B callbackasm1(SB) + MOVD $1289, R12 + B callbackasm1(SB) + MOVD $1290, R12 + B callbackasm1(SB) + MOVD $1291, R12 + B callbackasm1(SB) + MOVD $1292, R12 + B callbackasm1(SB) + MOVD $1293, R12 + B callbackasm1(SB) + MOVD $1294, R12 + B callbackasm1(SB) + MOVD $1295, R12 + B callbackasm1(SB) + MOVD $1296, R12 + B callbackasm1(SB) + MOVD $1297, R12 + B callbackasm1(SB) + MOVD $1298, R12 + B callbackasm1(SB) + MOVD $1299, R12 + B callbackasm1(SB) + MOVD $1300, R12 + B callbackasm1(SB) + MOVD $1301, R12 + B callbackasm1(SB) + MOVD $1302, R12 + B callbackasm1(SB) + MOVD $1303, R12 + B callbackasm1(SB) + MOVD $1304, R12 + B callbackasm1(SB) + MOVD $1305, R12 + B callbackasm1(SB) + MOVD $1306, R12 + B callbackasm1(SB) + MOVD $1307, R12 + B callbackasm1(SB) + MOVD $1308, R12 + B callbackasm1(SB) + MOVD $1309, R12 + B callbackasm1(SB) + MOVD $1310, R12 + B callbackasm1(SB) + MOVD $1311, R12 + B callbackasm1(SB) + MOVD $1312, R12 + B callbackasm1(SB) + MOVD $1313, R12 + B callbackasm1(SB) + MOVD $1314, R12 + B callbackasm1(SB) + MOVD $1315, R12 + B callbackasm1(SB) + MOVD $1316, R12 + B callbackasm1(SB) + MOVD $1317, R12 + B callbackasm1(SB) + MOVD $1318, R12 + B callbackasm1(SB) + MOVD $1319, R12 + B callbackasm1(SB) + MOVD $1320, R12 + B callbackasm1(SB) + MOVD $1321, R12 + B callbackasm1(SB) + MOVD $1322, R12 + B callbackasm1(SB) + MOVD $1323, R12 + B callbackasm1(SB) + MOVD $1324, R12 + B callbackasm1(SB) + MOVD $1325, R12 + B callbackasm1(SB) + MOVD $1326, R12 + B callbackasm1(SB) + MOVD $1327, R12 + B callbackasm1(SB) + MOVD $1328, R12 + B callbackasm1(SB) + MOVD $1329, R12 + B callbackasm1(SB) + MOVD $1330, R12 + B callbackasm1(SB) + MOVD $1331, R12 + B callbackasm1(SB) + MOVD $1332, R12 + B callbackasm1(SB) + MOVD $1333, R12 + B callbackasm1(SB) + MOVD $1334, R12 + B callbackasm1(SB) + MOVD $1335, R12 + B callbackasm1(SB) + MOVD $1336, R12 + B callbackasm1(SB) + MOVD $1337, R12 + B callbackasm1(SB) + MOVD $1338, R12 + B callbackasm1(SB) + MOVD $1339, R12 + B callbackasm1(SB) + MOVD $1340, R12 + B callbackasm1(SB) + MOVD $1341, R12 + B callbackasm1(SB) + MOVD $1342, R12 + B callbackasm1(SB) + MOVD $1343, R12 + B callbackasm1(SB) + MOVD $1344, R12 + B callbackasm1(SB) + MOVD $1345, R12 + B callbackasm1(SB) + MOVD $1346, R12 + B callbackasm1(SB) + MOVD $1347, R12 + B callbackasm1(SB) + MOVD $1348, R12 + B callbackasm1(SB) + MOVD $1349, R12 + B callbackasm1(SB) + MOVD $1350, R12 + B callbackasm1(SB) + MOVD $1351, R12 + B callbackasm1(SB) + MOVD $1352, R12 + B callbackasm1(SB) + MOVD $1353, R12 + B callbackasm1(SB) + MOVD $1354, R12 + B callbackasm1(SB) + MOVD $1355, R12 + B callbackasm1(SB) + MOVD $1356, R12 + B callbackasm1(SB) + MOVD $1357, R12 + B callbackasm1(SB) + MOVD $1358, R12 + B callbackasm1(SB) + MOVD $1359, R12 + B callbackasm1(SB) + MOVD $1360, R12 + B callbackasm1(SB) + MOVD $1361, R12 + B callbackasm1(SB) + MOVD $1362, R12 + B callbackasm1(SB) + MOVD $1363, R12 + B callbackasm1(SB) + MOVD $1364, R12 + B callbackasm1(SB) + MOVD $1365, R12 + B callbackasm1(SB) + MOVD $1366, R12 + B callbackasm1(SB) + MOVD $1367, R12 + B callbackasm1(SB) + MOVD $1368, R12 + B callbackasm1(SB) + MOVD $1369, R12 + B callbackasm1(SB) + MOVD $1370, R12 + B callbackasm1(SB) + MOVD $1371, R12 + B callbackasm1(SB) + MOVD $1372, R12 + B callbackasm1(SB) + MOVD $1373, R12 + B callbackasm1(SB) + MOVD $1374, R12 + B callbackasm1(SB) + MOVD $1375, R12 + B callbackasm1(SB) + MOVD $1376, R12 + B callbackasm1(SB) + MOVD $1377, R12 + B callbackasm1(SB) + MOVD $1378, R12 + B callbackasm1(SB) + MOVD $1379, R12 + B callbackasm1(SB) + MOVD $1380, R12 + B callbackasm1(SB) + MOVD $1381, R12 + B callbackasm1(SB) + MOVD $1382, R12 + B callbackasm1(SB) + MOVD $1383, R12 + B callbackasm1(SB) + MOVD $1384, R12 + B callbackasm1(SB) + MOVD $1385, R12 + B callbackasm1(SB) + MOVD $1386, R12 + B callbackasm1(SB) + MOVD $1387, R12 + B callbackasm1(SB) + MOVD $1388, R12 + B callbackasm1(SB) + MOVD $1389, R12 + B callbackasm1(SB) + MOVD $1390, R12 + B callbackasm1(SB) + MOVD $1391, R12 + B callbackasm1(SB) + MOVD $1392, R12 + B callbackasm1(SB) + MOVD $1393, R12 + B callbackasm1(SB) + MOVD $1394, R12 + B callbackasm1(SB) + MOVD $1395, R12 + B callbackasm1(SB) + MOVD $1396, R12 + B callbackasm1(SB) + MOVD $1397, R12 + B callbackasm1(SB) + MOVD $1398, R12 + B callbackasm1(SB) + MOVD $1399, R12 + B callbackasm1(SB) + MOVD $1400, R12 + B callbackasm1(SB) + MOVD $1401, R12 + B callbackasm1(SB) + MOVD $1402, R12 + B callbackasm1(SB) + MOVD $1403, R12 + B callbackasm1(SB) + MOVD $1404, R12 + B callbackasm1(SB) + MOVD $1405, R12 + B callbackasm1(SB) + MOVD $1406, R12 + B callbackasm1(SB) + MOVD $1407, R12 + B callbackasm1(SB) + MOVD $1408, R12 + B callbackasm1(SB) + MOVD $1409, R12 + B callbackasm1(SB) + MOVD $1410, R12 + B callbackasm1(SB) + MOVD $1411, R12 + B callbackasm1(SB) + MOVD $1412, R12 + B callbackasm1(SB) + MOVD $1413, R12 + B callbackasm1(SB) + MOVD $1414, R12 + B callbackasm1(SB) + MOVD $1415, R12 + B callbackasm1(SB) + MOVD $1416, R12 + B callbackasm1(SB) + MOVD $1417, R12 + B callbackasm1(SB) + MOVD $1418, R12 + B callbackasm1(SB) + MOVD $1419, R12 + B callbackasm1(SB) + MOVD $1420, R12 + B callbackasm1(SB) + MOVD $1421, R12 + B callbackasm1(SB) + MOVD $1422, R12 + B callbackasm1(SB) + MOVD $1423, R12 + B callbackasm1(SB) + MOVD $1424, R12 + B callbackasm1(SB) + MOVD $1425, R12 + B callbackasm1(SB) + MOVD $1426, R12 + B callbackasm1(SB) + MOVD $1427, R12 + B callbackasm1(SB) + MOVD $1428, R12 + B callbackasm1(SB) + MOVD $1429, R12 + B callbackasm1(SB) + MOVD $1430, R12 + B callbackasm1(SB) + MOVD $1431, R12 + B callbackasm1(SB) + MOVD $1432, R12 + B callbackasm1(SB) + MOVD $1433, R12 + B callbackasm1(SB) + MOVD $1434, R12 + B callbackasm1(SB) + MOVD $1435, R12 + B callbackasm1(SB) + MOVD $1436, R12 + B callbackasm1(SB) + MOVD $1437, R12 + B callbackasm1(SB) + MOVD $1438, R12 + B callbackasm1(SB) + MOVD $1439, R12 + B callbackasm1(SB) + MOVD $1440, R12 + B callbackasm1(SB) + MOVD $1441, R12 + B callbackasm1(SB) + MOVD $1442, R12 + B callbackasm1(SB) + MOVD $1443, R12 + B callbackasm1(SB) + MOVD $1444, R12 + B callbackasm1(SB) + MOVD $1445, R12 + B callbackasm1(SB) + MOVD $1446, R12 + B callbackasm1(SB) + MOVD $1447, R12 + B callbackasm1(SB) + MOVD $1448, R12 + B callbackasm1(SB) + MOVD $1449, R12 + B callbackasm1(SB) + MOVD $1450, R12 + B callbackasm1(SB) + MOVD $1451, R12 + B callbackasm1(SB) + MOVD $1452, R12 + B callbackasm1(SB) + MOVD $1453, R12 + B callbackasm1(SB) + MOVD $1454, R12 + B callbackasm1(SB) + MOVD $1455, R12 + B callbackasm1(SB) + MOVD $1456, R12 + B callbackasm1(SB) + MOVD $1457, R12 + B callbackasm1(SB) + MOVD $1458, R12 + B callbackasm1(SB) + MOVD $1459, R12 + B callbackasm1(SB) + MOVD $1460, R12 + B callbackasm1(SB) + MOVD $1461, R12 + B callbackasm1(SB) + MOVD $1462, R12 + B callbackasm1(SB) + MOVD $1463, R12 + B callbackasm1(SB) + MOVD $1464, R12 + B callbackasm1(SB) + MOVD $1465, R12 + B callbackasm1(SB) + MOVD $1466, R12 + B callbackasm1(SB) + MOVD $1467, R12 + B callbackasm1(SB) + MOVD $1468, R12 + B callbackasm1(SB) + MOVD $1469, R12 + B callbackasm1(SB) + MOVD $1470, R12 + B callbackasm1(SB) + MOVD $1471, R12 + B callbackasm1(SB) + MOVD $1472, R12 + B callbackasm1(SB) + MOVD $1473, R12 + B callbackasm1(SB) + MOVD $1474, R12 + B callbackasm1(SB) + MOVD $1475, R12 + B callbackasm1(SB) + MOVD $1476, R12 + B callbackasm1(SB) + MOVD $1477, R12 + B callbackasm1(SB) + MOVD $1478, R12 + B callbackasm1(SB) + MOVD $1479, R12 + B callbackasm1(SB) + MOVD $1480, R12 + B callbackasm1(SB) + MOVD $1481, R12 + B callbackasm1(SB) + MOVD $1482, R12 + B callbackasm1(SB) + MOVD $1483, R12 + B callbackasm1(SB) + MOVD $1484, R12 + B callbackasm1(SB) + MOVD $1485, R12 + B callbackasm1(SB) + MOVD $1486, R12 + B callbackasm1(SB) + MOVD $1487, R12 + B callbackasm1(SB) + MOVD $1488, R12 + B callbackasm1(SB) + MOVD $1489, R12 + B callbackasm1(SB) + MOVD $1490, R12 + B callbackasm1(SB) + MOVD $1491, R12 + B callbackasm1(SB) + MOVD $1492, R12 + B callbackasm1(SB) + MOVD $1493, R12 + B callbackasm1(SB) + MOVD $1494, R12 + B callbackasm1(SB) + MOVD $1495, R12 + B callbackasm1(SB) + MOVD $1496, R12 + B callbackasm1(SB) + MOVD $1497, R12 + B callbackasm1(SB) + MOVD $1498, R12 + B callbackasm1(SB) + MOVD $1499, R12 + B callbackasm1(SB) + MOVD $1500, R12 + B callbackasm1(SB) + MOVD $1501, R12 + B callbackasm1(SB) + MOVD $1502, R12 + B callbackasm1(SB) + MOVD $1503, R12 + B callbackasm1(SB) + MOVD $1504, R12 + B callbackasm1(SB) + MOVD $1505, R12 + B callbackasm1(SB) + MOVD $1506, R12 + B callbackasm1(SB) + MOVD $1507, R12 + B callbackasm1(SB) + MOVD $1508, R12 + B callbackasm1(SB) + MOVD $1509, R12 + B callbackasm1(SB) + MOVD $1510, R12 + B callbackasm1(SB) + MOVD $1511, R12 + B callbackasm1(SB) + MOVD $1512, R12 + B callbackasm1(SB) + MOVD $1513, R12 + B callbackasm1(SB) + MOVD $1514, R12 + B callbackasm1(SB) + MOVD $1515, R12 + B callbackasm1(SB) + MOVD $1516, R12 + B callbackasm1(SB) + MOVD $1517, R12 + B callbackasm1(SB) + MOVD $1518, R12 + B callbackasm1(SB) + MOVD $1519, R12 + B callbackasm1(SB) + MOVD $1520, R12 + B callbackasm1(SB) + MOVD $1521, R12 + B callbackasm1(SB) + MOVD $1522, R12 + B callbackasm1(SB) + MOVD $1523, R12 + B callbackasm1(SB) + MOVD $1524, R12 + B callbackasm1(SB) + MOVD $1525, R12 + B callbackasm1(SB) + MOVD $1526, R12 + B callbackasm1(SB) + MOVD $1527, R12 + B callbackasm1(SB) + MOVD $1528, R12 + B callbackasm1(SB) + MOVD $1529, R12 + B callbackasm1(SB) + MOVD $1530, R12 + B callbackasm1(SB) + MOVD $1531, R12 + B callbackasm1(SB) + MOVD $1532, R12 + B callbackasm1(SB) + MOVD $1533, R12 + B callbackasm1(SB) + MOVD $1534, R12 + B callbackasm1(SB) + MOVD $1535, R12 + B callbackasm1(SB) + MOVD $1536, R12 + B callbackasm1(SB) + MOVD $1537, R12 + B callbackasm1(SB) + MOVD $1538, R12 + B callbackasm1(SB) + MOVD $1539, R12 + B callbackasm1(SB) + MOVD $1540, R12 + B callbackasm1(SB) + MOVD $1541, R12 + B callbackasm1(SB) + MOVD $1542, R12 + B callbackasm1(SB) + MOVD $1543, R12 + B callbackasm1(SB) + MOVD $1544, R12 + B callbackasm1(SB) + MOVD $1545, R12 + B callbackasm1(SB) + MOVD $1546, R12 + B callbackasm1(SB) + MOVD $1547, R12 + B callbackasm1(SB) + MOVD $1548, R12 + B callbackasm1(SB) + MOVD $1549, R12 + B callbackasm1(SB) + MOVD $1550, R12 + B callbackasm1(SB) + MOVD $1551, R12 + B callbackasm1(SB) + MOVD $1552, R12 + B callbackasm1(SB) + MOVD $1553, R12 + B callbackasm1(SB) + MOVD $1554, R12 + B callbackasm1(SB) + MOVD $1555, R12 + B callbackasm1(SB) + MOVD $1556, R12 + B callbackasm1(SB) + MOVD $1557, R12 + B callbackasm1(SB) + MOVD $1558, R12 + B callbackasm1(SB) + MOVD $1559, R12 + B callbackasm1(SB) + MOVD $1560, R12 + B callbackasm1(SB) + MOVD $1561, R12 + B callbackasm1(SB) + MOVD $1562, R12 + B callbackasm1(SB) + MOVD $1563, R12 + B callbackasm1(SB) + MOVD $1564, R12 + B callbackasm1(SB) + MOVD $1565, R12 + B callbackasm1(SB) + MOVD $1566, R12 + B callbackasm1(SB) + MOVD $1567, R12 + B callbackasm1(SB) + MOVD $1568, R12 + B callbackasm1(SB) + MOVD $1569, R12 + B callbackasm1(SB) + MOVD $1570, R12 + B callbackasm1(SB) + MOVD $1571, R12 + B callbackasm1(SB) + MOVD $1572, R12 + B callbackasm1(SB) + MOVD $1573, R12 + B callbackasm1(SB) + MOVD $1574, R12 + B callbackasm1(SB) + MOVD $1575, R12 + B callbackasm1(SB) + MOVD $1576, R12 + B callbackasm1(SB) + MOVD $1577, R12 + B callbackasm1(SB) + MOVD $1578, R12 + B callbackasm1(SB) + MOVD $1579, R12 + B callbackasm1(SB) + MOVD $1580, R12 + B callbackasm1(SB) + MOVD $1581, R12 + B callbackasm1(SB) + MOVD $1582, R12 + B callbackasm1(SB) + MOVD $1583, R12 + B callbackasm1(SB) + MOVD $1584, R12 + B callbackasm1(SB) + MOVD $1585, R12 + B callbackasm1(SB) + MOVD $1586, R12 + B callbackasm1(SB) + MOVD $1587, R12 + B callbackasm1(SB) + MOVD $1588, R12 + B callbackasm1(SB) + MOVD $1589, R12 + B callbackasm1(SB) + MOVD $1590, R12 + B callbackasm1(SB) + MOVD $1591, R12 + B callbackasm1(SB) + MOVD $1592, R12 + B callbackasm1(SB) + MOVD $1593, R12 + B callbackasm1(SB) + MOVD $1594, R12 + B callbackasm1(SB) + MOVD $1595, R12 + B callbackasm1(SB) + MOVD $1596, R12 + B callbackasm1(SB) + MOVD $1597, R12 + B callbackasm1(SB) + MOVD $1598, R12 + B callbackasm1(SB) + MOVD $1599, R12 + B callbackasm1(SB) + MOVD $1600, R12 + B callbackasm1(SB) + MOVD $1601, R12 + B callbackasm1(SB) + MOVD $1602, R12 + B callbackasm1(SB) + MOVD $1603, R12 + B callbackasm1(SB) + MOVD $1604, R12 + B callbackasm1(SB) + MOVD $1605, R12 + B callbackasm1(SB) + MOVD $1606, R12 + B callbackasm1(SB) + MOVD $1607, R12 + B callbackasm1(SB) + MOVD $1608, R12 + B callbackasm1(SB) + MOVD $1609, R12 + B callbackasm1(SB) + MOVD $1610, R12 + B callbackasm1(SB) + MOVD $1611, R12 + B callbackasm1(SB) + MOVD $1612, R12 + B callbackasm1(SB) + MOVD $1613, R12 + B callbackasm1(SB) + MOVD $1614, R12 + B callbackasm1(SB) + MOVD $1615, R12 + B callbackasm1(SB) + MOVD $1616, R12 + B callbackasm1(SB) + MOVD $1617, R12 + B callbackasm1(SB) + MOVD $1618, R12 + B callbackasm1(SB) + MOVD $1619, R12 + B callbackasm1(SB) + MOVD $1620, R12 + B callbackasm1(SB) + MOVD $1621, R12 + B callbackasm1(SB) + MOVD $1622, R12 + B callbackasm1(SB) + MOVD $1623, R12 + B callbackasm1(SB) + MOVD $1624, R12 + B callbackasm1(SB) + MOVD $1625, R12 + B callbackasm1(SB) + MOVD $1626, R12 + B callbackasm1(SB) + MOVD $1627, R12 + B callbackasm1(SB) + MOVD $1628, R12 + B callbackasm1(SB) + MOVD $1629, R12 + B callbackasm1(SB) + MOVD $1630, R12 + B callbackasm1(SB) + MOVD $1631, R12 + B callbackasm1(SB) + MOVD $1632, R12 + B callbackasm1(SB) + MOVD $1633, R12 + B callbackasm1(SB) + MOVD $1634, R12 + B callbackasm1(SB) + MOVD $1635, R12 + B callbackasm1(SB) + MOVD $1636, R12 + B callbackasm1(SB) + MOVD $1637, R12 + B callbackasm1(SB) + MOVD $1638, R12 + B callbackasm1(SB) + MOVD $1639, R12 + B callbackasm1(SB) + MOVD $1640, R12 + B callbackasm1(SB) + MOVD $1641, R12 + B callbackasm1(SB) + MOVD $1642, R12 + B callbackasm1(SB) + MOVD $1643, R12 + B callbackasm1(SB) + MOVD $1644, R12 + B callbackasm1(SB) + MOVD $1645, R12 + B callbackasm1(SB) + MOVD $1646, R12 + B callbackasm1(SB) + MOVD $1647, R12 + B callbackasm1(SB) + MOVD $1648, R12 + B callbackasm1(SB) + MOVD $1649, R12 + B callbackasm1(SB) + MOVD $1650, R12 + B callbackasm1(SB) + MOVD $1651, R12 + B callbackasm1(SB) + MOVD $1652, R12 + B callbackasm1(SB) + MOVD $1653, R12 + B callbackasm1(SB) + MOVD $1654, R12 + B callbackasm1(SB) + MOVD $1655, R12 + B callbackasm1(SB) + MOVD $1656, R12 + B callbackasm1(SB) + MOVD $1657, R12 + B callbackasm1(SB) + MOVD $1658, R12 + B callbackasm1(SB) + MOVD $1659, R12 + B callbackasm1(SB) + MOVD $1660, R12 + B callbackasm1(SB) + MOVD $1661, R12 + B callbackasm1(SB) + MOVD $1662, R12 + B callbackasm1(SB) + MOVD $1663, R12 + B callbackasm1(SB) + MOVD $1664, R12 + B callbackasm1(SB) + MOVD $1665, R12 + B callbackasm1(SB) + MOVD $1666, R12 + B callbackasm1(SB) + MOVD $1667, R12 + B callbackasm1(SB) + MOVD $1668, R12 + B callbackasm1(SB) + MOVD $1669, R12 + B callbackasm1(SB) + MOVD $1670, R12 + B callbackasm1(SB) + MOVD $1671, R12 + B callbackasm1(SB) + MOVD $1672, R12 + B callbackasm1(SB) + MOVD $1673, R12 + B callbackasm1(SB) + MOVD $1674, R12 + B callbackasm1(SB) + MOVD $1675, R12 + B callbackasm1(SB) + MOVD $1676, R12 + B callbackasm1(SB) + MOVD $1677, R12 + B callbackasm1(SB) + MOVD $1678, R12 + B callbackasm1(SB) + MOVD $1679, R12 + B callbackasm1(SB) + MOVD $1680, R12 + B callbackasm1(SB) + MOVD $1681, R12 + B callbackasm1(SB) + MOVD $1682, R12 + B callbackasm1(SB) + MOVD $1683, R12 + B callbackasm1(SB) + MOVD $1684, R12 + B callbackasm1(SB) + MOVD $1685, R12 + B callbackasm1(SB) + MOVD $1686, R12 + B callbackasm1(SB) + MOVD $1687, R12 + B callbackasm1(SB) + MOVD $1688, R12 + B callbackasm1(SB) + MOVD $1689, R12 + B callbackasm1(SB) + MOVD $1690, R12 + B callbackasm1(SB) + MOVD $1691, R12 + B callbackasm1(SB) + MOVD $1692, R12 + B callbackasm1(SB) + MOVD $1693, R12 + B callbackasm1(SB) + MOVD $1694, R12 + B callbackasm1(SB) + MOVD $1695, R12 + B callbackasm1(SB) + MOVD $1696, R12 + B callbackasm1(SB) + MOVD $1697, R12 + B callbackasm1(SB) + MOVD $1698, R12 + B callbackasm1(SB) + MOVD $1699, R12 + B callbackasm1(SB) + MOVD $1700, R12 + B callbackasm1(SB) + MOVD $1701, R12 + B callbackasm1(SB) + MOVD $1702, R12 + B callbackasm1(SB) + MOVD $1703, R12 + B callbackasm1(SB) + MOVD $1704, R12 + B callbackasm1(SB) + MOVD $1705, R12 + B callbackasm1(SB) + MOVD $1706, R12 + B callbackasm1(SB) + MOVD $1707, R12 + B callbackasm1(SB) + MOVD $1708, R12 + B callbackasm1(SB) + MOVD $1709, R12 + B callbackasm1(SB) + MOVD $1710, R12 + B callbackasm1(SB) + MOVD $1711, R12 + B callbackasm1(SB) + MOVD $1712, R12 + B callbackasm1(SB) + MOVD $1713, R12 + B callbackasm1(SB) + MOVD $1714, R12 + B callbackasm1(SB) + MOVD $1715, R12 + B callbackasm1(SB) + MOVD $1716, R12 + B callbackasm1(SB) + MOVD $1717, R12 + B callbackasm1(SB) + MOVD $1718, R12 + B callbackasm1(SB) + MOVD $1719, R12 + B callbackasm1(SB) + MOVD $1720, R12 + B callbackasm1(SB) + MOVD $1721, R12 + B callbackasm1(SB) + MOVD $1722, R12 + B callbackasm1(SB) + MOVD $1723, R12 + B callbackasm1(SB) + MOVD $1724, R12 + B callbackasm1(SB) + MOVD $1725, R12 + B callbackasm1(SB) + MOVD $1726, R12 + B callbackasm1(SB) + MOVD $1727, R12 + B callbackasm1(SB) + MOVD $1728, R12 + B callbackasm1(SB) + MOVD $1729, R12 + B callbackasm1(SB) + MOVD $1730, R12 + B callbackasm1(SB) + MOVD $1731, R12 + B callbackasm1(SB) + MOVD $1732, R12 + B callbackasm1(SB) + MOVD $1733, R12 + B callbackasm1(SB) + MOVD $1734, R12 + B callbackasm1(SB) + MOVD $1735, R12 + B callbackasm1(SB) + MOVD $1736, R12 + B callbackasm1(SB) + MOVD $1737, R12 + B callbackasm1(SB) + MOVD $1738, R12 + B callbackasm1(SB) + MOVD $1739, R12 + B callbackasm1(SB) + MOVD $1740, R12 + B callbackasm1(SB) + MOVD $1741, R12 + B callbackasm1(SB) + MOVD $1742, R12 + B callbackasm1(SB) + MOVD $1743, R12 + B callbackasm1(SB) + MOVD $1744, R12 + B callbackasm1(SB) + MOVD $1745, R12 + B callbackasm1(SB) + MOVD $1746, R12 + B callbackasm1(SB) + MOVD $1747, R12 + B callbackasm1(SB) + MOVD $1748, R12 + B callbackasm1(SB) + MOVD $1749, R12 + B callbackasm1(SB) + MOVD $1750, R12 + B callbackasm1(SB) + MOVD $1751, R12 + B callbackasm1(SB) + MOVD $1752, R12 + B callbackasm1(SB) + MOVD $1753, R12 + B callbackasm1(SB) + MOVD $1754, R12 + B callbackasm1(SB) + MOVD $1755, R12 + B callbackasm1(SB) + MOVD $1756, R12 + B callbackasm1(SB) + MOVD $1757, R12 + B callbackasm1(SB) + MOVD $1758, R12 + B callbackasm1(SB) + MOVD $1759, R12 + B callbackasm1(SB) + MOVD $1760, R12 + B callbackasm1(SB) + MOVD $1761, R12 + B callbackasm1(SB) + MOVD $1762, R12 + B callbackasm1(SB) + MOVD $1763, R12 + B callbackasm1(SB) + MOVD $1764, R12 + B callbackasm1(SB) + MOVD $1765, R12 + B callbackasm1(SB) + MOVD $1766, R12 + B callbackasm1(SB) + MOVD $1767, R12 + B callbackasm1(SB) + MOVD $1768, R12 + B callbackasm1(SB) + MOVD $1769, R12 + B callbackasm1(SB) + MOVD $1770, R12 + B callbackasm1(SB) + MOVD $1771, R12 + B callbackasm1(SB) + MOVD $1772, R12 + B callbackasm1(SB) + MOVD $1773, R12 + B callbackasm1(SB) + MOVD $1774, R12 + B callbackasm1(SB) + MOVD $1775, R12 + B callbackasm1(SB) + MOVD $1776, R12 + B callbackasm1(SB) + MOVD $1777, R12 + B callbackasm1(SB) + MOVD $1778, R12 + B callbackasm1(SB) + MOVD $1779, R12 + B callbackasm1(SB) + MOVD $1780, R12 + B callbackasm1(SB) + MOVD $1781, R12 + B callbackasm1(SB) + MOVD $1782, R12 + B callbackasm1(SB) + MOVD $1783, R12 + B callbackasm1(SB) + MOVD $1784, R12 + B callbackasm1(SB) + MOVD $1785, R12 + B callbackasm1(SB) + MOVD $1786, R12 + B callbackasm1(SB) + MOVD $1787, R12 + B callbackasm1(SB) + MOVD $1788, R12 + B callbackasm1(SB) + MOVD $1789, R12 + B callbackasm1(SB) + MOVD $1790, R12 + B callbackasm1(SB) + MOVD $1791, R12 + B callbackasm1(SB) + MOVD $1792, R12 + B callbackasm1(SB) + MOVD $1793, R12 + B callbackasm1(SB) + MOVD $1794, R12 + B callbackasm1(SB) + MOVD $1795, R12 + B callbackasm1(SB) + MOVD $1796, R12 + B callbackasm1(SB) + MOVD $1797, R12 + B callbackasm1(SB) + MOVD $1798, R12 + B callbackasm1(SB) + MOVD $1799, R12 + B callbackasm1(SB) + MOVD $1800, R12 + B callbackasm1(SB) + MOVD $1801, R12 + B callbackasm1(SB) + MOVD $1802, R12 + B callbackasm1(SB) + MOVD $1803, R12 + B callbackasm1(SB) + MOVD $1804, R12 + B callbackasm1(SB) + MOVD $1805, R12 + B callbackasm1(SB) + MOVD $1806, R12 + B callbackasm1(SB) + MOVD $1807, R12 + B callbackasm1(SB) + MOVD $1808, R12 + B callbackasm1(SB) + MOVD $1809, R12 + B callbackasm1(SB) + MOVD $1810, R12 + B callbackasm1(SB) + MOVD $1811, R12 + B callbackasm1(SB) + MOVD $1812, R12 + B callbackasm1(SB) + MOVD $1813, R12 + B callbackasm1(SB) + MOVD $1814, R12 + B callbackasm1(SB) + MOVD $1815, R12 + B callbackasm1(SB) + MOVD $1816, R12 + B callbackasm1(SB) + MOVD $1817, R12 + B callbackasm1(SB) + MOVD $1818, R12 + B callbackasm1(SB) + MOVD $1819, R12 + B callbackasm1(SB) + MOVD $1820, R12 + B callbackasm1(SB) + MOVD $1821, R12 + B callbackasm1(SB) + MOVD $1822, R12 + B callbackasm1(SB) + MOVD $1823, R12 + B callbackasm1(SB) + MOVD $1824, R12 + B callbackasm1(SB) + MOVD $1825, R12 + B callbackasm1(SB) + MOVD $1826, R12 + B callbackasm1(SB) + MOVD $1827, R12 + B callbackasm1(SB) + MOVD $1828, R12 + B callbackasm1(SB) + MOVD $1829, R12 + B callbackasm1(SB) + MOVD $1830, R12 + B callbackasm1(SB) + MOVD $1831, R12 + B callbackasm1(SB) + MOVD $1832, R12 + B callbackasm1(SB) + MOVD $1833, R12 + B callbackasm1(SB) + MOVD $1834, R12 + B callbackasm1(SB) + MOVD $1835, R12 + B callbackasm1(SB) + MOVD $1836, R12 + B callbackasm1(SB) + MOVD $1837, R12 + B callbackasm1(SB) + MOVD $1838, R12 + B callbackasm1(SB) + MOVD $1839, R12 + B callbackasm1(SB) + MOVD $1840, R12 + B callbackasm1(SB) + MOVD $1841, R12 + B callbackasm1(SB) + MOVD $1842, R12 + B callbackasm1(SB) + MOVD $1843, R12 + B callbackasm1(SB) + MOVD $1844, R12 + B callbackasm1(SB) + MOVD $1845, R12 + B callbackasm1(SB) + MOVD $1846, R12 + B callbackasm1(SB) + MOVD $1847, R12 + B callbackasm1(SB) + MOVD $1848, R12 + B callbackasm1(SB) + MOVD $1849, R12 + B callbackasm1(SB) + MOVD $1850, R12 + B callbackasm1(SB) + MOVD $1851, R12 + B callbackasm1(SB) + MOVD $1852, R12 + B callbackasm1(SB) + MOVD $1853, R12 + B callbackasm1(SB) + MOVD $1854, R12 + B callbackasm1(SB) + MOVD $1855, R12 + B callbackasm1(SB) + MOVD $1856, R12 + B callbackasm1(SB) + MOVD $1857, R12 + B callbackasm1(SB) + MOVD $1858, R12 + B callbackasm1(SB) + MOVD $1859, R12 + B callbackasm1(SB) + MOVD $1860, R12 + B callbackasm1(SB) + MOVD $1861, R12 + B callbackasm1(SB) + MOVD $1862, R12 + B callbackasm1(SB) + MOVD $1863, R12 + B callbackasm1(SB) + MOVD $1864, R12 + B callbackasm1(SB) + MOVD $1865, R12 + B callbackasm1(SB) + MOVD $1866, R12 + B callbackasm1(SB) + MOVD $1867, R12 + B callbackasm1(SB) + MOVD $1868, R12 + B callbackasm1(SB) + MOVD $1869, R12 + B callbackasm1(SB) + MOVD $1870, R12 + B callbackasm1(SB) + MOVD $1871, R12 + B callbackasm1(SB) + MOVD $1872, R12 + B callbackasm1(SB) + MOVD $1873, R12 + B callbackasm1(SB) + MOVD $1874, R12 + B callbackasm1(SB) + MOVD $1875, R12 + B callbackasm1(SB) + MOVD $1876, R12 + B callbackasm1(SB) + MOVD $1877, R12 + B callbackasm1(SB) + MOVD $1878, R12 + B callbackasm1(SB) + MOVD $1879, R12 + B callbackasm1(SB) + MOVD $1880, R12 + B callbackasm1(SB) + MOVD $1881, R12 + B callbackasm1(SB) + MOVD $1882, R12 + B callbackasm1(SB) + MOVD $1883, R12 + B callbackasm1(SB) + MOVD $1884, R12 + B callbackasm1(SB) + MOVD $1885, R12 + B callbackasm1(SB) + MOVD $1886, R12 + B callbackasm1(SB) + MOVD $1887, R12 + B callbackasm1(SB) + MOVD $1888, R12 + B callbackasm1(SB) + MOVD $1889, R12 + B callbackasm1(SB) + MOVD $1890, R12 + B callbackasm1(SB) + MOVD $1891, R12 + B callbackasm1(SB) + MOVD $1892, R12 + B callbackasm1(SB) + MOVD $1893, R12 + B callbackasm1(SB) + MOVD $1894, R12 + B callbackasm1(SB) + MOVD $1895, R12 + B callbackasm1(SB) + MOVD $1896, R12 + B callbackasm1(SB) + MOVD $1897, R12 + B callbackasm1(SB) + MOVD $1898, R12 + B callbackasm1(SB) + MOVD $1899, R12 + B callbackasm1(SB) + MOVD $1900, R12 + B callbackasm1(SB) + MOVD $1901, R12 + B callbackasm1(SB) + MOVD $1902, R12 + B callbackasm1(SB) + MOVD $1903, R12 + B callbackasm1(SB) + MOVD $1904, R12 + B callbackasm1(SB) + MOVD $1905, R12 + B callbackasm1(SB) + MOVD $1906, R12 + B callbackasm1(SB) + MOVD $1907, R12 + B callbackasm1(SB) + MOVD $1908, R12 + B callbackasm1(SB) + MOVD $1909, R12 + B callbackasm1(SB) + MOVD $1910, R12 + B callbackasm1(SB) + MOVD $1911, R12 + B callbackasm1(SB) + MOVD $1912, R12 + B callbackasm1(SB) + MOVD $1913, R12 + B callbackasm1(SB) + MOVD $1914, R12 + B callbackasm1(SB) + MOVD $1915, R12 + B callbackasm1(SB) + MOVD $1916, R12 + B callbackasm1(SB) + MOVD $1917, R12 + B callbackasm1(SB) + MOVD $1918, R12 + B callbackasm1(SB) + MOVD $1919, R12 + B callbackasm1(SB) + MOVD $1920, R12 + B callbackasm1(SB) + MOVD $1921, R12 + B callbackasm1(SB) + MOVD $1922, R12 + B callbackasm1(SB) + MOVD $1923, R12 + B callbackasm1(SB) + MOVD $1924, R12 + B callbackasm1(SB) + MOVD $1925, R12 + B callbackasm1(SB) + MOVD $1926, R12 + B callbackasm1(SB) + MOVD $1927, R12 + B callbackasm1(SB) + MOVD $1928, R12 + B callbackasm1(SB) + MOVD $1929, R12 + B callbackasm1(SB) + MOVD $1930, R12 + B callbackasm1(SB) + MOVD $1931, R12 + B callbackasm1(SB) + MOVD $1932, R12 + B callbackasm1(SB) + MOVD $1933, R12 + B callbackasm1(SB) + MOVD $1934, R12 + B callbackasm1(SB) + MOVD $1935, R12 + B callbackasm1(SB) + MOVD $1936, R12 + B callbackasm1(SB) + MOVD $1937, R12 + B callbackasm1(SB) + MOVD $1938, R12 + B callbackasm1(SB) + MOVD $1939, R12 + B callbackasm1(SB) + MOVD $1940, R12 + B callbackasm1(SB) + MOVD $1941, R12 + B callbackasm1(SB) + MOVD $1942, R12 + B callbackasm1(SB) + MOVD $1943, R12 + B callbackasm1(SB) + MOVD $1944, R12 + B callbackasm1(SB) + MOVD $1945, R12 + B callbackasm1(SB) + MOVD $1946, R12 + B callbackasm1(SB) + MOVD $1947, R12 + B callbackasm1(SB) + MOVD $1948, R12 + B callbackasm1(SB) + MOVD $1949, R12 + B callbackasm1(SB) + MOVD $1950, R12 + B callbackasm1(SB) + MOVD $1951, R12 + B callbackasm1(SB) + MOVD $1952, R12 + B callbackasm1(SB) + MOVD $1953, R12 + B callbackasm1(SB) + MOVD $1954, R12 + B callbackasm1(SB) + MOVD $1955, R12 + B callbackasm1(SB) + MOVD $1956, R12 + B callbackasm1(SB) + MOVD $1957, R12 + B callbackasm1(SB) + MOVD $1958, R12 + B callbackasm1(SB) + MOVD $1959, R12 + B callbackasm1(SB) + MOVD $1960, R12 + B callbackasm1(SB) + MOVD $1961, R12 + B callbackasm1(SB) + MOVD $1962, R12 + B callbackasm1(SB) + MOVD $1963, R12 + B callbackasm1(SB) + MOVD $1964, R12 + B callbackasm1(SB) + MOVD $1965, R12 + B callbackasm1(SB) + MOVD $1966, R12 + B callbackasm1(SB) + MOVD $1967, R12 + B callbackasm1(SB) + MOVD $1968, R12 + B callbackasm1(SB) + MOVD $1969, R12 + B callbackasm1(SB) + MOVD $1970, R12 + B callbackasm1(SB) + MOVD $1971, R12 + B callbackasm1(SB) + MOVD $1972, R12 + B callbackasm1(SB) + MOVD $1973, R12 + B callbackasm1(SB) + MOVD $1974, R12 + B callbackasm1(SB) + MOVD $1975, R12 + B callbackasm1(SB) + MOVD $1976, R12 + B callbackasm1(SB) + MOVD $1977, R12 + B callbackasm1(SB) + MOVD $1978, R12 + B callbackasm1(SB) + MOVD $1979, R12 + B callbackasm1(SB) + MOVD $1980, R12 + B callbackasm1(SB) + MOVD $1981, R12 + B callbackasm1(SB) + MOVD $1982, R12 + B callbackasm1(SB) + MOVD $1983, R12 + B callbackasm1(SB) + MOVD $1984, R12 + B callbackasm1(SB) + MOVD $1985, R12 + B callbackasm1(SB) + MOVD $1986, R12 + B callbackasm1(SB) + MOVD $1987, R12 + B callbackasm1(SB) + MOVD $1988, R12 + B callbackasm1(SB) + MOVD $1989, R12 + B callbackasm1(SB) + MOVD $1990, R12 + B callbackasm1(SB) + MOVD $1991, R12 + B callbackasm1(SB) + MOVD $1992, R12 + B callbackasm1(SB) + MOVD $1993, R12 + B callbackasm1(SB) + MOVD $1994, R12 + B callbackasm1(SB) + MOVD $1995, R12 + B callbackasm1(SB) + MOVD $1996, R12 + B callbackasm1(SB) + MOVD $1997, R12 + B callbackasm1(SB) + MOVD $1998, R12 + B callbackasm1(SB) + MOVD $1999, R12 + B callbackasm1(SB) diff --git a/vendor/github.com/klauspost/compress/.goreleaser.yml b/vendor/github.com/klauspost/compress/.goreleaser.yml index a22953805c6..4528059ca68 100644 --- a/vendor/github.com/klauspost/compress/.goreleaser.yml +++ b/vendor/github.com/klauspost/compress/.goreleaser.yml @@ -1,5 +1,5 @@ -# This is an example goreleaser.yaml file with some sane defaults. -# Make sure to check the documentation at http://goreleaser.com +version: 2 + before: hooks: - ./gen.sh @@ -99,7 +99,7 @@ archives: checksum: name_template: 'checksums.txt' snapshot: - name_template: "{{ .Tag }}-next" + version_template: "{{ .Tag }}-next" changelog: sort: asc filters: diff --git a/vendor/github.com/klauspost/compress/README.md b/vendor/github.com/klauspost/compress/README.md index 684a30853ab..de264c85a5a 100644 --- a/vendor/github.com/klauspost/compress/README.md +++ b/vendor/github.com/klauspost/compress/README.md @@ -16,6 +16,13 @@ This package provides various compression algorithms. # changelog +* Sep 23rd, 2024 - [1.17.10](https://github.com/klauspost/compress/releases/tag/v1.17.10) + * gzhttp: Add TransportAlwaysDecompress option. https://github.com/klauspost/compress/pull/978 + * gzhttp: Add supported decompress request body by @mirecl in https://github.com/klauspost/compress/pull/1002 + * s2: Add EncodeBuffer buffer recycling callback https://github.com/klauspost/compress/pull/982 + * zstd: Improve memory usage on small streaming encodes https://github.com/klauspost/compress/pull/1007 + * flate: read data written with partial flush by @vajexal in https://github.com/klauspost/compress/pull/996 + * Jun 12th, 2024 - [1.17.9](https://github.com/klauspost/compress/releases/tag/v1.17.9) * s2: Reduce ReadFrom temporary allocations https://github.com/klauspost/compress/pull/949 * flate, zstd: Shave some bytes off amd64 matchLen by @greatroar in https://github.com/klauspost/compress/pull/963 diff --git a/vendor/github.com/klauspost/compress/s2/encode.go b/vendor/github.com/klauspost/compress/s2/encode.go index 0c9088adfee..20b802270a7 100644 --- a/vendor/github.com/klauspost/compress/s2/encode.go +++ b/vendor/github.com/klauspost/compress/s2/encode.go @@ -9,6 +9,9 @@ import ( "encoding/binary" "math" "math/bits" + "sync" + + "github.com/klauspost/compress/internal/race" ) // Encode returns the encoded form of src. The returned slice may be a sub- @@ -52,6 +55,8 @@ func Encode(dst, src []byte) []byte { return dst[:d] } +var estblockPool [2]sync.Pool + // EstimateBlockSize will perform a very fast compression // without outputting the result and return the compressed output size. // The function returns -1 if no improvement could be achieved. @@ -61,9 +66,25 @@ func EstimateBlockSize(src []byte) (d int) { return -1 } if len(src) <= 1024 { - d = calcBlockSizeSmall(src) + const sz, pool = 2048, 0 + tmp, ok := estblockPool[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer estblockPool[pool].Put(tmp) + + d = calcBlockSizeSmall(src, tmp) } else { - d = calcBlockSize(src) + const sz, pool = 32768, 1 + tmp, ok := estblockPool[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer estblockPool[pool].Put(tmp) + + d = calcBlockSize(src, tmp) } if d == 0 { diff --git a/vendor/github.com/klauspost/compress/s2/encode_amd64.go b/vendor/github.com/klauspost/compress/s2/encode_amd64.go index 4f45206a4ef..7aadd255fe3 100644 --- a/vendor/github.com/klauspost/compress/s2/encode_amd64.go +++ b/vendor/github.com/klauspost/compress/s2/encode_amd64.go @@ -3,10 +3,16 @@ package s2 -import "github.com/klauspost/compress/internal/race" +import ( + "sync" + + "github.com/klauspost/compress/internal/race" +) const hasAmd64Asm = true +var encPools [4]sync.Pool + // encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It // assumes that the varint-encoded length of the decompressed bytes has already // been written. @@ -29,23 +35,60 @@ func encodeBlock(dst, src []byte) (d int) { ) if len(src) >= 4<<20 { - return encodeBlockAsm(dst, src) + const sz, pool = 65536, 0 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm(dst, src, tmp) } if len(src) >= limit12B { - return encodeBlockAsm4MB(dst, src) + const sz, pool = 65536, 0 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm4MB(dst, src, tmp) } if len(src) >= limit10B { - return encodeBlockAsm12B(dst, src) + const sz, pool = 16384, 1 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm12B(dst, src, tmp) } if len(src) >= limit8B { - return encodeBlockAsm10B(dst, src) + const sz, pool = 4096, 2 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm10B(dst, src, tmp) } if len(src) < minNonLiteralBlockSize { return 0 } - return encodeBlockAsm8B(dst, src) + const sz, pool = 1024, 3 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm8B(dst, src, tmp) } +var encBetterPools [5]sync.Pool + // encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It // assumes that the varint-encoded length of the decompressed bytes has already // been written. @@ -68,21 +111,59 @@ func encodeBlockBetter(dst, src []byte) (d int) { ) if len(src) > 4<<20 { - return encodeBetterBlockAsm(dst, src) + const sz, pool = 589824, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeBetterBlockAsm(dst, src, tmp) } if len(src) >= limit12B { - return encodeBetterBlockAsm4MB(dst, src) + const sz, pool = 589824, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + + return encodeBetterBlockAsm4MB(dst, src, tmp) } if len(src) >= limit10B { - return encodeBetterBlockAsm12B(dst, src) + const sz, pool = 81920, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + + return encodeBetterBlockAsm12B(dst, src, tmp) } if len(src) >= limit8B { - return encodeBetterBlockAsm10B(dst, src) + const sz, pool = 20480, 1 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeBetterBlockAsm10B(dst, src, tmp) } if len(src) < minNonLiteralBlockSize { return 0 } - return encodeBetterBlockAsm8B(dst, src) + + const sz, pool = 5120, 2 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeBetterBlockAsm8B(dst, src, tmp) } // encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It @@ -105,22 +186,57 @@ func encodeBlockSnappy(dst, src []byte) (d int) { // Use 8 bit table when less than... limit8B = 512 ) - if len(src) >= 64<<10 { - return encodeSnappyBlockAsm(dst, src) + if len(src) > 65536 { + const sz, pool = 65536, 0 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm(dst, src, tmp) } if len(src) >= limit12B { - return encodeSnappyBlockAsm64K(dst, src) + const sz, pool = 65536, 0 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm64K(dst, src, tmp) } if len(src) >= limit10B { - return encodeSnappyBlockAsm12B(dst, src) + const sz, pool = 16384, 1 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm12B(dst, src, tmp) } if len(src) >= limit8B { - return encodeSnappyBlockAsm10B(dst, src) + const sz, pool = 4096, 2 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm10B(dst, src, tmp) } if len(src) < minNonLiteralBlockSize { return 0 } - return encodeSnappyBlockAsm8B(dst, src) + const sz, pool = 1024, 3 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm8B(dst, src, tmp) } // encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It @@ -143,20 +259,59 @@ func encodeBlockBetterSnappy(dst, src []byte) (d int) { // Use 8 bit table when less than... limit8B = 512 ) - if len(src) >= 64<<10 { - return encodeSnappyBetterBlockAsm(dst, src) + if len(src) > 65536 { + const sz, pool = 589824, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeSnappyBetterBlockAsm(dst, src, tmp) } + if len(src) >= limit12B { - return encodeSnappyBetterBlockAsm64K(dst, src) + const sz, pool = 294912, 4 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + + return encodeSnappyBetterBlockAsm64K(dst, src, tmp) } if len(src) >= limit10B { - return encodeSnappyBetterBlockAsm12B(dst, src) + const sz, pool = 81920, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + + return encodeSnappyBetterBlockAsm12B(dst, src, tmp) } if len(src) >= limit8B { - return encodeSnappyBetterBlockAsm10B(dst, src) + const sz, pool = 20480, 1 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeSnappyBetterBlockAsm10B(dst, src, tmp) } if len(src) < minNonLiteralBlockSize { return 0 } - return encodeSnappyBetterBlockAsm8B(dst, src) + + const sz, pool = 5120, 2 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeSnappyBetterBlockAsm8B(dst, src, tmp) } diff --git a/vendor/github.com/klauspost/compress/s2/encode_go.go b/vendor/github.com/klauspost/compress/s2/encode_go.go index 6b393c34d37..dd1c973ca51 100644 --- a/vendor/github.com/klauspost/compress/s2/encode_go.go +++ b/vendor/github.com/klauspost/compress/s2/encode_go.go @@ -317,7 +317,7 @@ func matchLen(a []byte, b []byte) int { } // input must be > inputMargin -func calcBlockSize(src []byte) (d int) { +func calcBlockSize(src []byte, _ *[32768]byte) (d int) { // Initialize the hash table. const ( tableBits = 13 @@ -503,7 +503,7 @@ emitRemainder: } // length must be > inputMargin. -func calcBlockSizeSmall(src []byte) (d int) { +func calcBlockSizeSmall(src []byte, _ *[2048]byte) (d int) { // Initialize the hash table. const ( tableBits = 9 diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go index 297e41501ba..f43aa815435 100644 --- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go +++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go @@ -11,154 +11,154 @@ func _dummy_() // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm(dst []byte, src []byte) int +func encodeBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int // encodeBlockAsm4MB encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4194304 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm4MB(dst []byte, src []byte) int +func encodeBlockAsm4MB(dst []byte, src []byte, tmp *[65536]byte) int // encodeBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 16383 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm12B(dst []byte, src []byte) int +func encodeBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int // encodeBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4095 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm10B(dst []byte, src []byte) int +func encodeBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int // encodeBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 511 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm8B(dst []byte, src []byte) int +func encodeBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int // encodeBetterBlockAsm encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4294967295 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm(dst []byte, src []byte) int +func encodeBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int // encodeBetterBlockAsm4MB encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4194304 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm4MB(dst []byte, src []byte) int +func encodeBetterBlockAsm4MB(dst []byte, src []byte, tmp *[589824]byte) int // encodeBetterBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 16383 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm12B(dst []byte, src []byte) int +func encodeBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int // encodeBetterBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4095 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm10B(dst []byte, src []byte) int +func encodeBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int // encodeBetterBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 511 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm8B(dst []byte, src []byte) int +func encodeBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int // encodeSnappyBlockAsm encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4294967295 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm(dst []byte, src []byte) int +func encodeSnappyBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int // encodeSnappyBlockAsm64K encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 65535 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm64K(dst []byte, src []byte) int +func encodeSnappyBlockAsm64K(dst []byte, src []byte, tmp *[65536]byte) int // encodeSnappyBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 16383 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm12B(dst []byte, src []byte) int +func encodeSnappyBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int // encodeSnappyBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4095 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm10B(dst []byte, src []byte) int +func encodeSnappyBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int // encodeSnappyBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 511 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm8B(dst []byte, src []byte) int +func encodeSnappyBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int // encodeSnappyBetterBlockAsm encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4294967295 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int // encodeSnappyBetterBlockAsm64K encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 65535 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte, tmp *[294912]byte) int // encodeSnappyBetterBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 16383 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int // encodeSnappyBetterBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4095 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int // encodeSnappyBetterBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 511 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int // calcBlockSize encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4294967295 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func calcBlockSize(src []byte) int +func calcBlockSize(src []byte, tmp *[32768]byte) int // calcBlockSizeSmall encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 1024 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func calcBlockSizeSmall(src []byte) int +func calcBlockSizeSmall(src []byte, tmp *[2048]byte) int // emitLiteral writes a literal chunk and returns the number of bytes written. // diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s index 2ff5b334017..df9be687be7 100644 --- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s +++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s @@ -13,1270 +13,1271 @@ TEXT ·_dummy_(SB), $0 #endif RET -// func encodeBlockAsm(dst []byte, src []byte) int +// func encodeBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm(SB), $65560-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000200, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm repeat_extend_back_loop_encodeBlockAsm: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm repeat_extend_back_end_encodeBlockAsm: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 5(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 5(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_repeat_emit_encodeBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_repeat_emit_encodeBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_repeat_emit_encodeBlockAsm four_bytes_repeat_emit_encodeBlockAsm: - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_repeat_emit_encodeBlockAsm three_bytes_repeat_emit_encodeBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm two_bytes_repeat_emit_encodeBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm JMP memmove_long_repeat_emit_encodeBlockAsm one_byte_repeat_emit_encodeBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm memmove_long_repeat_emit_encodeBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm matchlen_bsf_16repeat_extend_encodeBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm matchlen_match8_repeat_extend_encodeBlockAsm: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm matchlen_bsf_8_repeat_extend_encodeBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm matchlen_match4_repeat_extend_encodeBlockAsm: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm JB repeat_extend_forward_end_encodeBlockAsm - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm matchlen_match1_repeat_extend_encodeBlockAsm: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm // emitRepeat emit_repeat_again_match_repeat_encodeBlockAsm: - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_match_repeat_encodeBlockAsm cant_repeat_two_offset_match_repeat_encodeBlockAsm: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_match_repeat_encodeBlockAsm - CMPL BX, $0x0100ffff + CMPL SI, $0x0100ffff JB repeat_five_match_repeat_encodeBlockAsm - LEAL -16842747(BX), BX - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(SI), SI + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_repeat_encodeBlockAsm repeat_five_match_repeat_encodeBlockAsm: - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm repeat_four_match_repeat_encodeBlockAsm: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm repeat_three_match_repeat_encodeBlockAsm: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_match_repeat_encodeBlockAsm: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_match_repeat_encodeBlockAsm: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_as_copy_encodeBlockAsm: // emitCopy - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB two_byte_offset_repeat_as_copy_encodeBlockAsm - CMPL BX, $0x40 + CMPL SI, $0x40 JBE four_bytes_remain_repeat_as_copy_encodeBlockAsm - MOVB $0xff, (AX) - MOVL SI, 1(AX) - LEAL -64(BX), BX - ADDQ $0x05, AX - CMPL BX, $0x04 + MOVB $0xff, (CX) + MOVL DI, 1(CX) + LEAL -64(SI), SI + ADDQ $0x05, CX + CMPL SI, $0x04 JB four_bytes_remain_repeat_as_copy_encodeBlockAsm // emitRepeat emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy: - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy - CMPL BX, $0x0100ffff + CMPL SI, $0x0100ffff JB repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy - LEAL -16842747(BX), BX - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(SI), SI + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy: - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm four_bytes_remain_repeat_as_copy_encodeBlockAsm: - TESTL BX, BX + TESTL SI, SI JZ repeat_end_emit_encodeBlockAsm - XORL DI, DI - LEAL -1(DI)(BX*4), BX - MOVB BL, (AX) - MOVL SI, 1(AX) - ADDQ $0x05, AX + XORL R8, R8 + LEAL -1(R8)(SI*4), SI + MOVB SI, (CX) + MOVL DI, 1(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm two_byte_offset_repeat_as_copy_encodeBlockAsm: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - MOVL SI, R8 - SHRL $0x08, R8 - SHLL $0x05, R8 - ORL R8, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + MOVL DI, R9 + SHRL $0x08, R9 + SHLL $0x05, R9 + ORL R9, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - CMPL BX, $0x0100ffff + CMPL SI, $0x0100ffff JB repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - LEAL -16842747(BX), BX - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(SI), SI + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm long_offset_short_repeat_as_copy_encodeBlockAsm: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short: - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short - CMPL BX, $0x0100ffff + CMPL SI, $0x0100ffff JB repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short - LEAL -16842747(BX), BX - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(SI), SI + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short: - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm two_byte_offset_short_repeat_as_copy_encodeBlockAsm: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeBlockAsm - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm emit_copy_three_repeat_as_copy_encodeBlockAsm: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm no_repeat_found_encodeBlockAsm: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm candidate3_match_encodeBlockAsm: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm candidate2_match_encodeBlockAsm: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm match_extend_back_loop_encodeBlockAsm: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm JMP match_extend_back_loop_encodeBlockAsm match_extend_back_end_encodeBlockAsm: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB three_bytes_match_emit_encodeBlockAsm - CMPL DI, $0x01000000 + CMPL R8, $0x01000000 JB four_bytes_match_emit_encodeBlockAsm - MOVB $0xfc, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_encodeBlockAsm four_bytes_match_emit_encodeBlockAsm: - MOVL DI, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW DI, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX + MOVL R8, R10 + SHRL $0x10, R10 + MOVB $0xf8, (CX) + MOVW R8, 1(CX) + MOVB R10, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeBlockAsm three_bytes_match_emit_encodeBlockAsm: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm two_bytes_match_emit_encodeBlockAsm: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm JMP memmove_long_match_emit_encodeBlockAsm one_byte_match_emit_encodeBlockAsm: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm memmove_long_match_emit_encodeBlockAsm: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm: match_nolit_loop_encodeBlockAsm: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm matchlen_bsf_16match_nolit_encodeBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm matchlen_match8_match_nolit_encodeBlockAsm: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm matchlen_bsf_8_match_nolit_encodeBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm matchlen_match4_match_nolit_encodeBlockAsm: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm JB match_nolit_end_encodeBlockAsm - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm matchlen_match1_match_nolit_encodeBlockAsm: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB two_byte_offset_match_nolit_encodeBlockAsm - CMPL R9, $0x40 + CMPL R10, $0x40 JBE four_bytes_remain_match_nolit_encodeBlockAsm - MOVB $0xff, (AX) - MOVL BX, 1(AX) - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 + MOVB $0xff, (CX) + MOVL SI, 1(CX) + LEAL -64(R10), R10 + ADDQ $0x05, CX + CMPL R10, $0x04 JB four_bytes_remain_match_nolit_encodeBlockAsm // emitRepeat emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy: - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm_emit_copy - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm_emit_copy - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm_emit_copy - CMPL R9, $0x0100ffff + CMPL R10, $0x0100ffff JB repeat_five_match_nolit_encodeBlockAsm_emit_copy - LEAL -16842747(R9), R9 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R10), R10 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy repeat_five_match_nolit_encodeBlockAsm_emit_copy: - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm four_bytes_remain_match_nolit_encodeBlockAsm: - TESTL R9, R9 + TESTL R10, R10 JZ match_nolit_emitcopy_end_encodeBlockAsm - XORL SI, SI - LEAL -1(SI)(R9*4), R9 - MOVB R9, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + XORL DI, DI + LEAL -1(DI)(R10*4), R10 + MOVB R10, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm two_byte_offset_match_nolit_encodeBlockAsm: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - MOVL BX, DI - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + MOVL SI, R8 + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short_2b: - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b - CMPL R9, $0x0100ffff + CMPL R10, $0x0100ffff JB repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b - LEAL -16842747(R9), R9 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R10), R10 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short_2b repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b: - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm long_offset_short_match_nolit_encodeBlockAsm: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short: - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm_emit_copy_short - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm_emit_copy_short - CMPL R9, $0x0100ffff + CMPL R10, $0x0100ffff JB repeat_five_match_nolit_encodeBlockAsm_emit_copy_short - LEAL -16842747(R9), R9 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R10), R10 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short repeat_five_match_nolit_encodeBlockAsm_emit_copy_short: - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy_short: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm two_byte_offset_short_match_nolit_encodeBlockAsm: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeBlockAsm - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm emit_copy_three_match_nolit_encodeBlockAsm: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x32, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x32, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x32, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x32, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm - INCL CX + INCL DX JMP search_loop_encodeBlockAsm emit_remainder_encodeBlockAsm: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm @@ -1286,41 +1287,41 @@ emit_remainder_ok_encodeBlockAsm: JB three_bytes_emit_remainder_encodeBlockAsm CMPL DX, $0x01000000 JB four_bytes_emit_remainder_encodeBlockAsm - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL DX, 1(CX) + ADDQ $0x05, CX JMP memmove_long_emit_remainder_encodeBlockAsm four_bytes_emit_remainder_encodeBlockAsm: MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeBlockAsm three_bytes_emit_remainder_encodeBlockAsm: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm two_bytes_emit_remainder_encodeBlockAsm: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm JMP memmove_long_emit_remainder_encodeBlockAsm one_byte_emit_remainder_encodeBlockAsm: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -1336,73 +1337,73 @@ memmove_emit_remainder_encodeBlockAsm: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm memmove_long_emit_remainder_encodeBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back: MOVOU (SI), X4 @@ -1416,1199 +1417,1200 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBlockAsm4MB(dst []byte, src []byte) int +// func encodeBlockAsm4MB(dst []byte, src []byte, tmp *[65536]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm4MB(SB), $65560-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000200, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm4MB(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm4MB: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm4MB MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm4MB: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm4MB - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm4MB - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm4MB repeat_extend_back_loop_encodeBlockAsm4MB: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm4MB - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm4MB - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm4MB repeat_extend_back_end_encodeBlockAsm4MB: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 4(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 4(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm4MB: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm4MB - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm4MB - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm4MB - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_repeat_emit_encodeBlockAsm4MB - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_repeat_emit_encodeBlockAsm4MB three_bytes_repeat_emit_encodeBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm4MB two_bytes_repeat_emit_encodeBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm4MB JMP memmove_long_repeat_emit_encodeBlockAsm4MB one_byte_repeat_emit_encodeBlockAsm4MB: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm4MB: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm4MB memmove_long_repeat_emit_encodeBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm4MB: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm4MB - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm4MB - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB matchlen_bsf_16repeat_extend_encodeBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm4MB matchlen_match8_repeat_extend_encodeBlockAsm4MB: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm4MB - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm4MB matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm4MB matchlen_match4_repeat_extend_encodeBlockAsm4MB: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm4MB - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm4MB - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm4MB: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm4MB JB repeat_extend_forward_end_encodeBlockAsm4MB - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm4MB - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm4MB matchlen_match1_repeat_extend_encodeBlockAsm4MB: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm4MB - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm4MB: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm4MB // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm4MB - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_match_repeat_encodeBlockAsm4MB cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm4MB - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_match_repeat_encodeBlockAsm4MB - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_match_repeat_encodeBlockAsm4MB: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_match_repeat_encodeBlockAsm4MB: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_match_repeat_encodeBlockAsm4MB: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_match_repeat_encodeBlockAsm4MB: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_as_copy_encodeBlockAsm4MB: // emitCopy - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB two_byte_offset_repeat_as_copy_encodeBlockAsm4MB - CMPL BX, $0x40 + CMPL SI, $0x40 JBE four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB - MOVB $0xff, (AX) - MOVL SI, 1(AX) - LEAL -64(BX), BX - ADDQ $0x05, AX - CMPL BX, $0x04 + MOVB $0xff, (CX) + MOVL DI, 1(CX) + LEAL -64(SI), SI + ADDQ $0x05, CX + CMPL SI, $0x04 JB four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB: - TESTL BX, BX + TESTL SI, SI JZ repeat_end_emit_encodeBlockAsm4MB - XORL DI, DI - LEAL -1(DI)(BX*4), BX - MOVB BL, (AX) - MOVL SI, 1(AX) - ADDQ $0x05, AX + XORL R8, R8 + LEAL -1(R8)(SI*4), SI + MOVB SI, (CX) + MOVL DI, 1(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB two_byte_offset_repeat_as_copy_encodeBlockAsm4MB: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm4MB - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB long_offset_short_repeat_as_copy_encodeBlockAsm4MB: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB emit_copy_three_repeat_as_copy_encodeBlockAsm4MB: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm4MB: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm4MB no_repeat_found_encodeBlockAsm4MB: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm4MB - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm4MB - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm4MB - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm4MB candidate3_match_encodeBlockAsm4MB: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm4MB candidate2_match_encodeBlockAsm4MB: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm4MB: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm4MB match_extend_back_loop_encodeBlockAsm4MB: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm4MB - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm4MB - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm4MB JMP match_extend_back_loop_encodeBlockAsm4MB match_extend_back_end_encodeBlockAsm4MB: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 4(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 4(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm4MB: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm4MB - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm4MB - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm4MB - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB three_bytes_match_emit_encodeBlockAsm4MB - MOVL DI, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW DI, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX + MOVL R8, R10 + SHRL $0x10, R10 + MOVB $0xf8, (CX) + MOVW R8, 1(CX) + MOVB R10, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeBlockAsm4MB three_bytes_match_emit_encodeBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm4MB two_bytes_match_emit_encodeBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm4MB JMP memmove_long_match_emit_encodeBlockAsm4MB one_byte_match_emit_encodeBlockAsm4MB: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm4MB: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm4MB: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm4MB memmove_long_match_emit_encodeBlockAsm4MB: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm4MB: match_nolit_loop_encodeBlockAsm4MB: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm4MB: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm4MB - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm4MB - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm4MB - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm4MB matchlen_bsf_16match_nolit_encodeBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm4MB matchlen_match8_match_nolit_encodeBlockAsm4MB: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm4MB - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm4MB - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm4MB matchlen_bsf_8_match_nolit_encodeBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm4MB matchlen_match4_match_nolit_encodeBlockAsm4MB: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm4MB - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm4MB - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm4MB: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm4MB JB match_nolit_end_encodeBlockAsm4MB - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm4MB - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm4MB matchlen_match1_match_nolit_encodeBlockAsm4MB: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm4MB - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm4MB: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB two_byte_offset_match_nolit_encodeBlockAsm4MB - CMPL R9, $0x40 + CMPL R10, $0x40 JBE four_bytes_remain_match_nolit_encodeBlockAsm4MB - MOVB $0xff, (AX) - MOVL BX, 1(AX) - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 + MOVB $0xff, (CX) + MOVL SI, 1(CX) + LEAL -64(R10), R10 + ADDQ $0x05, CX + CMPL R10, $0x04 JB four_bytes_remain_match_nolit_encodeBlockAsm4MB // emitRepeat - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB four_bytes_remain_match_nolit_encodeBlockAsm4MB: - TESTL R9, R9 + TESTL R10, R10 JZ match_nolit_emitcopy_end_encodeBlockAsm4MB - XORL SI, SI - LEAL -1(SI)(R9*4), R9 - MOVB R9, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + XORL DI, DI + LEAL -1(DI)(R10*4), R10 + MOVB R10, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB two_byte_offset_match_nolit_encodeBlockAsm4MB: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm4MB - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm4MB - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 - + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 + // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB long_offset_short_match_nolit_encodeBlockAsm4MB: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB two_byte_offset_short_match_nolit_encodeBlockAsm4MB: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm4MB - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeBlockAsm4MB - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB emit_copy_three_match_nolit_encodeBlockAsm4MB: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm4MB: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm4MB - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm4MB: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x32, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x32, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x32, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x32, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm4MB - INCL CX + INCL DX JMP search_loop_encodeBlockAsm4MB emit_remainder_encodeBlockAsm4MB: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 4(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 4(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm4MB: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm4MB - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm4MB @@ -2618,33 +2620,33 @@ emit_remainder_ok_encodeBlockAsm4MB: JB three_bytes_emit_remainder_encodeBlockAsm4MB MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeBlockAsm4MB three_bytes_emit_remainder_encodeBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm4MB two_bytes_emit_remainder_encodeBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm4MB JMP memmove_long_emit_remainder_encodeBlockAsm4MB one_byte_emit_remainder_encodeBlockAsm4MB: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm4MB: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -2660,73 +2662,73 @@ memmove_emit_remainder_encodeBlockAsm4MB: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm4MB: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm4MB memmove_long_emit_remainder_encodeBlockAsm4MB: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back: MOVOU (SI), X4 @@ -2740,967 +2742,968 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm4MB: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBlockAsm12B(dst []byte, src []byte) int +// func encodeBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm12B(SB), $16408-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000080, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm12B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000080, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm12B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm12B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm12B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm12B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x000000cf1bbcdcbb, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x18, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x000000cf1bbcdcbb, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x18, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x34, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x18, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x18, R11 + IMULQ R9, R11 + SHRQ $0x34, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x18, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm12B - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm12B repeat_extend_back_loop_encodeBlockAsm12B: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm12B - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm12B - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm12B repeat_extend_back_end_encodeBlockAsm12B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm12B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm12B JB three_bytes_repeat_emit_encodeBlockAsm12B three_bytes_repeat_emit_encodeBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm12B two_bytes_repeat_emit_encodeBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm12B JMP memmove_long_repeat_emit_encodeBlockAsm12B one_byte_repeat_emit_encodeBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm12B memmove_long_repeat_emit_encodeBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm12B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm12B: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm12B - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm12B - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm12B - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm12B matchlen_bsf_16repeat_extend_encodeBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm12B matchlen_match8_repeat_extend_encodeBlockAsm12B: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm12B - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm12B - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm12B matchlen_bsf_8_repeat_extend_encodeBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm12B matchlen_match4_repeat_extend_encodeBlockAsm12B: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm12B - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm12B - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm12B: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm12B JB repeat_extend_forward_end_encodeBlockAsm12B - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm12B - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm12B matchlen_match1_repeat_extend_encodeBlockAsm12B: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm12B - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm12B: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm12B // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm12B - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm12B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_match_repeat_encodeBlockAsm12B cant_repeat_two_offset_match_repeat_encodeBlockAsm12B: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm12B - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_match_repeat_encodeBlockAsm12B: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_match_repeat_encodeBlockAsm12B: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_match_repeat_encodeBlockAsm12B: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_as_copy_encodeBlockAsm12B: // emitCopy - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm12B - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B long_offset_short_repeat_as_copy_encodeBlockAsm12B: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm12B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeBlockAsm12B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B emit_copy_three_repeat_as_copy_encodeBlockAsm12B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm12B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm12B no_repeat_found_encodeBlockAsm12B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm12B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm12B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm12B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm12B candidate3_match_encodeBlockAsm12B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm12B candidate2_match_encodeBlockAsm12B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm12B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm12B match_extend_back_loop_encodeBlockAsm12B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm12B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm12B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm12B JMP match_extend_back_loop_encodeBlockAsm12B match_extend_back_end_encodeBlockAsm12B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm12B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm12B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm12B JB three_bytes_match_emit_encodeBlockAsm12B three_bytes_match_emit_encodeBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm12B two_bytes_match_emit_encodeBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm12B JMP memmove_long_match_emit_encodeBlockAsm12B one_byte_match_emit_encodeBlockAsm12B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm12B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm12B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm12B memmove_long_match_emit_encodeBlockAsm12B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm12B: match_nolit_loop_encodeBlockAsm12B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm12B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm12B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm12B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm12B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm12B matchlen_bsf_16match_nolit_encodeBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm12B matchlen_match8_match_nolit_encodeBlockAsm12B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm12B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm12B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm12B matchlen_bsf_8_match_nolit_encodeBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm12B matchlen_match4_match_nolit_encodeBlockAsm12B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm12B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm12B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm12B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm12B JB match_nolit_end_encodeBlockAsm12B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm12B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm12B matchlen_match1_match_nolit_encodeBlockAsm12B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm12B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm12B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm12B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm12B - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B long_offset_short_match_nolit_encodeBlockAsm12B: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm12B - + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX + JMP match_nolit_emitcopy_end_encodeBlockAsm12B + two_byte_offset_short_match_nolit_encodeBlockAsm12B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm12B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeBlockAsm12B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B emit_copy_three_match_nolit_encodeBlockAsm12B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm12B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm12B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm12B: - MOVQ $0x000000cf1bbcdcbb, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x18, DI - IMULQ R8, DI - SHRQ $0x34, DI - SHLQ $0x18, BX - IMULQ R8, BX - SHRQ $0x34, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x000000cf1bbcdcbb, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x18, R8 + IMULQ R9, R8 + SHRQ $0x34, R8 + SHLQ $0x18, SI + IMULQ R9, SI + SHRQ $0x34, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm12B - INCL CX + INCL DX JMP search_loop_encodeBlockAsm12B emit_remainder_encodeBlockAsm12B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm12B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm12B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm12B @@ -3709,26 +3712,26 @@ emit_remainder_ok_encodeBlockAsm12B: JB three_bytes_emit_remainder_encodeBlockAsm12B three_bytes_emit_remainder_encodeBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm12B two_bytes_emit_remainder_encodeBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm12B JMP memmove_long_emit_remainder_encodeBlockAsm12B one_byte_emit_remainder_encodeBlockAsm12B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -3744,73 +3747,73 @@ memmove_emit_remainder_encodeBlockAsm12B: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm12B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm12B memmove_long_emit_remainder_encodeBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back: MOVOU (SI), X4 @@ -3824,967 +3827,968 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm12B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBlockAsm10B(dst []byte, src []byte) int +// func encodeBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm10B(SB), $4120-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000020, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm10B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000020, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm10B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm10B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm10B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm10B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x36, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x36, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm10B - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm10B repeat_extend_back_loop_encodeBlockAsm10B: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm10B - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm10B - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm10B repeat_extend_back_end_encodeBlockAsm10B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm10B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm10B JB three_bytes_repeat_emit_encodeBlockAsm10B three_bytes_repeat_emit_encodeBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm10B two_bytes_repeat_emit_encodeBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm10B JMP memmove_long_repeat_emit_encodeBlockAsm10B one_byte_repeat_emit_encodeBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm10B memmove_long_repeat_emit_encodeBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm10B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm10B: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm10B - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm10B - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm10B - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm10B matchlen_bsf_16repeat_extend_encodeBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm10B matchlen_match8_repeat_extend_encodeBlockAsm10B: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm10B - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm10B - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm10B matchlen_bsf_8_repeat_extend_encodeBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm10B matchlen_match4_repeat_extend_encodeBlockAsm10B: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm10B - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm10B - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm10B: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm10B JB repeat_extend_forward_end_encodeBlockAsm10B - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm10B - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm10B matchlen_match1_repeat_extend_encodeBlockAsm10B: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm10B - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm10B: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm10B // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm10B - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm10B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_match_repeat_encodeBlockAsm10B cant_repeat_two_offset_match_repeat_encodeBlockAsm10B: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm10B - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_match_repeat_encodeBlockAsm10B: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_match_repeat_encodeBlockAsm10B: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_match_repeat_encodeBlockAsm10B: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_as_copy_encodeBlockAsm10B: // emitCopy - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm10B - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B long_offset_short_repeat_as_copy_encodeBlockAsm10B: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm10B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeBlockAsm10B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B emit_copy_three_repeat_as_copy_encodeBlockAsm10B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm10B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm10B no_repeat_found_encodeBlockAsm10B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm10B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm10B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm10B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm10B candidate3_match_encodeBlockAsm10B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm10B candidate2_match_encodeBlockAsm10B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm10B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm10B match_extend_back_loop_encodeBlockAsm10B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm10B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm10B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm10B JMP match_extend_back_loop_encodeBlockAsm10B match_extend_back_end_encodeBlockAsm10B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm10B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm10B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm10B JB three_bytes_match_emit_encodeBlockAsm10B three_bytes_match_emit_encodeBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm10B two_bytes_match_emit_encodeBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm10B JMP memmove_long_match_emit_encodeBlockAsm10B one_byte_match_emit_encodeBlockAsm10B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm10B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm10B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm10B memmove_long_match_emit_encodeBlockAsm10B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm10B: match_nolit_loop_encodeBlockAsm10B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm10B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm10B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm10B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm10B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm10B matchlen_bsf_16match_nolit_encodeBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm10B matchlen_match8_match_nolit_encodeBlockAsm10B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm10B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm10B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm10B matchlen_bsf_8_match_nolit_encodeBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm10B matchlen_match4_match_nolit_encodeBlockAsm10B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm10B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm10B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm10B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm10B JB match_nolit_end_encodeBlockAsm10B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm10B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm10B matchlen_match1_match_nolit_encodeBlockAsm10B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm10B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm10B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm10B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm10B - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B long_offset_short_match_nolit_encodeBlockAsm10B: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B two_byte_offset_short_match_nolit_encodeBlockAsm10B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm10B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeBlockAsm10B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B emit_copy_three_match_nolit_encodeBlockAsm10B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm10B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm10B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm10B: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x36, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x36, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x36, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x36, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm10B - INCL CX + INCL DX JMP search_loop_encodeBlockAsm10B emit_remainder_encodeBlockAsm10B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm10B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm10B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm10B @@ -4793,26 +4797,26 @@ emit_remainder_ok_encodeBlockAsm10B: JB three_bytes_emit_remainder_encodeBlockAsm10B three_bytes_emit_remainder_encodeBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm10B two_bytes_emit_remainder_encodeBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm10B JMP memmove_long_emit_remainder_encodeBlockAsm10B one_byte_emit_remainder_encodeBlockAsm10B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -4828,73 +4832,73 @@ memmove_emit_remainder_encodeBlockAsm10B: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm10B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm10B memmove_long_emit_remainder_encodeBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back: MOVOU (SI), X4 @@ -4908,943 +4912,944 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm10B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBlockAsm8B(dst []byte, src []byte) int +// func encodeBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm8B(SB), $1048-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000008, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm8B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000008, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm8B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm8B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm8B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm8B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x38, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x38, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x38, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 - JNE no_repeat_found_encodeBlockAsm8B - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x38, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x38, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 + JNE no_repeat_found_encodeBlockAsm8B + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm8B repeat_extend_back_loop_encodeBlockAsm8B: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm8B - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm8B - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm8B repeat_extend_back_end_encodeBlockAsm8B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm8B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm8B JB three_bytes_repeat_emit_encodeBlockAsm8B three_bytes_repeat_emit_encodeBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm8B two_bytes_repeat_emit_encodeBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm8B JMP memmove_long_repeat_emit_encodeBlockAsm8B one_byte_repeat_emit_encodeBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm8B memmove_long_repeat_emit_encodeBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm8B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm8B: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm8B - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm8B - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm8B - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm8B matchlen_bsf_16repeat_extend_encodeBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm8B matchlen_match8_repeat_extend_encodeBlockAsm8B: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm8B - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm8B - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm8B matchlen_bsf_8_repeat_extend_encodeBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm8B matchlen_match4_repeat_extend_encodeBlockAsm8B: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm8B - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm8B - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm8B: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm8B JB repeat_extend_forward_end_encodeBlockAsm8B - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm8B - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm8B matchlen_match1_repeat_extend_encodeBlockAsm8B: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm8B - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm8B: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm8B // emitRepeat - MOVL BX, SI - LEAL -4(BX), BX - CMPL SI, $0x08 + MOVL SI, DI + LEAL -4(SI), SI + CMPL DI, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm8B - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm8B cant_repeat_two_offset_match_repeat_encodeBlockAsm8B: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm8B - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_match_repeat_encodeBlockAsm8B: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_match_repeat_encodeBlockAsm8B: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_as_copy_encodeBlockAsm8B: // emitCopy - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm8B - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b - MOVL BX, SI - LEAL -4(BX), BX - CMPL SI, $0x08 + MOVL SI, DI + LEAL -4(SI), SI + CMPL DI, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B long_offset_short_repeat_as_copy_encodeBlockAsm8B: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat - MOVL BX, SI - LEAL -4(BX), BX - CMPL SI, $0x08 + MOVL SI, DI + LEAL -4(SI), SI + CMPL DI, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm8B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B emit_copy_three_repeat_as_copy_encodeBlockAsm8B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm8B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm8B no_repeat_found_encodeBlockAsm8B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm8B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm8B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm8B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm8B candidate3_match_encodeBlockAsm8B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm8B candidate2_match_encodeBlockAsm8B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm8B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm8B match_extend_back_loop_encodeBlockAsm8B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm8B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm8B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm8B JMP match_extend_back_loop_encodeBlockAsm8B match_extend_back_end_encodeBlockAsm8B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm8B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm8B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm8B JB three_bytes_match_emit_encodeBlockAsm8B three_bytes_match_emit_encodeBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm8B two_bytes_match_emit_encodeBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm8B JMP memmove_long_match_emit_encodeBlockAsm8B one_byte_match_emit_encodeBlockAsm8B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm8B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm8B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm8B memmove_long_match_emit_encodeBlockAsm8B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) - ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 + ADDQ $0x20, R12 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm8B: match_nolit_loop_encodeBlockAsm8B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm8B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm8B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm8B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm8B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm8B matchlen_bsf_16match_nolit_encodeBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm8B matchlen_match8_match_nolit_encodeBlockAsm8B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm8B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm8B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm8B matchlen_bsf_8_match_nolit_encodeBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm8B matchlen_match4_match_nolit_encodeBlockAsm8B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm8B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm8B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm8B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm8B JB match_nolit_end_encodeBlockAsm8B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm8B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm8B matchlen_match1_match_nolit_encodeBlockAsm8B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm8B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm8B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm8B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm8B - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b - MOVL R9, BX - LEAL -4(R9), R9 - CMPL BX, $0x08 + MOVL R10, SI + LEAL -4(R10), R10 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B long_offset_short_match_nolit_encodeBlockAsm8B: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat - MOVL R9, BX - LEAL -4(R9), R9 - CMPL BX, $0x08 + MOVL R10, SI + LEAL -4(R10), R10 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B two_byte_offset_short_match_nolit_encodeBlockAsm8B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm8B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B emit_copy_three_match_nolit_encodeBlockAsm8B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm8B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm8B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm8B: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x38, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x38, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x38, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x38, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm8B - INCL CX + INCL DX JMP search_loop_encodeBlockAsm8B emit_remainder_encodeBlockAsm8B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm8B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm8B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm8B @@ -5853,26 +5858,26 @@ emit_remainder_ok_encodeBlockAsm8B: JB three_bytes_emit_remainder_encodeBlockAsm8B three_bytes_emit_remainder_encodeBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm8B two_bytes_emit_remainder_encodeBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm8B JMP memmove_long_emit_remainder_encodeBlockAsm8B one_byte_emit_remainder_encodeBlockAsm8B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -5888,73 +5893,73 @@ memmove_emit_remainder_encodeBlockAsm8B: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm8B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm8B memmove_long_emit_remainder_encodeBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back: MOVOU (SI), X4 @@ -5968,961 +5973,962 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm8B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm(dst []byte, src []byte) int +// func encodeBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm(SB), $589848-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00001200, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00001200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x07, BX - CMPL BX, $0x63 + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x07, SI + CMPL SI, $0x63 JBE check_maxskip_ok_encodeBetterBlockAsm - LEAL 100(CX), BX + LEAL 100(DX), SI JMP check_maxskip_cont_encodeBetterBlockAsm check_maxskip_ok_encodeBetterBlockAsm: - LEAL 1(CX)(BX*1), BX + LEAL 1(DX)(SI*1), SI check_maxskip_cont_encodeBetterBlockAsm: - CMPL BX, 8(SP) + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x00cf1bbcdcbfa563, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 524312(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 524312(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x00cf1bbcdcbfa563, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL 524288(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 524288(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm no_short_found_encodeBetterBlockAsm: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm candidateS_match_encodeBetterBlockAsm: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm match_extend_back_loop_encodeBetterBlockAsm: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm JMP match_extend_back_loop_encodeBetterBlockAsm match_extend_back_end_encodeBetterBlockAsm: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm matchlen_bsf_16match_nolit_encodeBetterBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm matchlen_match8_match_nolit_encodeBetterBlockAsm: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm matchlen_bsf_8_match_nolit_encodeBetterBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm matchlen_match4_match_nolit_encodeBetterBlockAsm: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm JB match_nolit_end_encodeBetterBlockAsm - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm matchlen_match1_match_nolit_encodeBetterBlockAsm: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm - CMPL R11, $0x01 + CMPL R12, $0x01 JA match_length_ok_encodeBetterBlockAsm - CMPL DI, $0x0000ffff + CMPL R8, $0x0000ffff JBE match_length_ok_encodeBetterBlockAsm - MOVL 20(SP), CX - INCL CX + MOVL 20(SP), DX + INCL DX JMP search_loop_encodeBetterBlockAsm match_length_ok_encodeBetterBlockAsm: - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_encodeBetterBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_match_emit_encodeBetterBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_encodeBetterBlockAsm four_bytes_match_emit_encodeBetterBlockAsm: - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeBetterBlockAsm three_bytes_match_emit_encodeBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm two_bytes_match_emit_encodeBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm JMP memmove_long_match_emit_encodeBetterBlockAsm one_byte_match_emit_encodeBetterBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm memmove_long_match_emit_encodeBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB two_byte_offset_match_nolit_encodeBetterBlockAsm - CMPL R11, $0x40 + CMPL R12, $0x40 JBE four_bytes_remain_match_nolit_encodeBetterBlockAsm - MOVB $0xff, (AX) - MOVL DI, 1(AX) - LEAL -64(R11), R11 - ADDQ $0x05, AX - CMPL R11, $0x04 + MOVB $0xff, (CX) + MOVL R8, 1(CX) + LEAL -64(R12), R12 + ADDQ $0x05, CX + CMPL R12, $0x04 JB four_bytes_remain_match_nolit_encodeBetterBlockAsm // emitRepeat emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy: - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy - CMPL R11, $0x0100ffff + CMPL R12, $0x0100ffff JB repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy - LEAL -16842747(R11), R11 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R12), R12 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy: - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm four_bytes_remain_match_nolit_encodeBetterBlockAsm: - TESTL R11, R11 + TESTL R12, R12 JZ match_nolit_emitcopy_end_encodeBetterBlockAsm - XORL BX, BX - LEAL -1(BX)(R11*4), R11 - MOVB R11, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + XORL SI, SI + LEAL -1(SI)(R12*4), R12 + MOVB R12, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm two_byte_offset_match_nolit_encodeBetterBlockAsm: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - MOVL DI, R8 - SHRL $0x08, R8 - SHLL $0x05, R8 - ORL R8, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + MOVL R8, R9 + SHRL $0x08, R9 + SHLL $0x05, R9 + ORL R9, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - CMPL R11, $0x0100ffff + CMPL R12, $0x0100ffff JB repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - LEAL -16842747(R11), R11 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R12), R12 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm long_offset_short_match_nolit_encodeBetterBlockAsm: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short: - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short - CMPL R11, $0x0100ffff + CMPL R12, $0x0100ffff JB repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short - LEAL -16842747(R11), R11 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R12), R12 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short: - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm two_byte_offset_short_match_nolit_encodeBetterBlockAsm: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeBetterBlockAsm - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm emit_copy_three_match_nolit_encodeBetterBlockAsm: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm match_is_repeat_encodeBetterBlockAsm: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_repeat_encodeBetterBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_match_emit_repeat_encodeBetterBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm four_bytes_match_emit_repeat_encodeBetterBlockAsm: - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm three_bytes_match_emit_repeat_encodeBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm two_bytes_match_emit_repeat_encodeBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm one_byte_match_emit_repeat_encodeBetterBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm memmove_long_match_emit_repeat_encodeBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm: - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_repeat_encodeBetterBlockAsm - CMPL R11, $0x0100ffff + CMPL R12, $0x0100ffff JB repeat_five_match_nolit_repeat_encodeBetterBlockAsm - LEAL -16842747(R11), R11 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R12), R12 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm repeat_five_match_nolit_repeat_encodeBetterBlockAsm: - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_repeat_encodeBetterBlockAsm: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_repeat_encodeBetterBlockAsm: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_repeat_encodeBetterBlockAsm: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm - CMPQ AX, (SP) - JB match_nolit_dst_ok_encodeBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) - RET - -match_nolit_dst_ok_encodeBetterBlockAsm: - MOVQ $0x00cf1bbcdcbfa563, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x32, R10 - SHLQ $0x08, R11 - IMULQ BX, R11 - SHRQ $0x2f, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x32, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 524312(SP)(R10*4) - MOVL R13, 524312(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + CMPQ CX, (SP) + JB match_nolit_dst_ok_encodeBetterBlockAsm + MOVQ $0x00000000, ret+56(FP) + RET + +match_nolit_dst_ok_encodeBetterBlockAsm: + MOVQ $0x00cf1bbcdcbfa563, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x08, R10 + IMULQ SI, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x32, R11 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x2f, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x32, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 524288(AX)(R11*4) + MOVL R14, 524288(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x08, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x2f, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x08, R11 + IMULQ SI, R11 + SHRQ $0x2f, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm emit_remainder_encodeBetterBlockAsm: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm @@ -6932,41 +6938,41 @@ emit_remainder_ok_encodeBetterBlockAsm: JB three_bytes_emit_remainder_encodeBetterBlockAsm CMPL DX, $0x01000000 JB four_bytes_emit_remainder_encodeBetterBlockAsm - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL DX, 1(CX) + ADDQ $0x05, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm four_bytes_emit_remainder_encodeBetterBlockAsm: MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm three_bytes_emit_remainder_encodeBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm two_bytes_emit_remainder_encodeBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm JMP memmove_long_emit_remainder_encodeBetterBlockAsm one_byte_emit_remainder_encodeBetterBlockAsm: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -6982,73 +6988,73 @@ memmove_emit_remainder_encodeBetterBlockAsm: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm memmove_long_emit_remainder_encodeBetterBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back: MOVOU (SI), X4 @@ -7062,903 +7068,904 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm4MB(dst []byte, src []byte) int +// func encodeBetterBlockAsm4MB(dst []byte, src []byte, tmp *[589824]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm4MB(SB), $589848-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00001200, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm4MB(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00001200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm4MB: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm4MB MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm4MB: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x07, BX - CMPL BX, $0x63 + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x07, SI + CMPL SI, $0x63 JBE check_maxskip_ok_encodeBetterBlockAsm4MB - LEAL 100(CX), BX + LEAL 100(DX), SI JMP check_maxskip_cont_encodeBetterBlockAsm4MB check_maxskip_ok_encodeBetterBlockAsm4MB: - LEAL 1(CX)(BX*1), BX + LEAL 1(DX)(SI*1), SI check_maxskip_cont_encodeBetterBlockAsm4MB: - CMPL BX, 8(SP) + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm4MB - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x00cf1bbcdcbfa563, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 524312(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 524312(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x00cf1bbcdcbfa563, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL 524288(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 524288(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm4MB - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm4MB - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm4MB no_short_found_encodeBetterBlockAsm4MB: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm4MB - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm4MB - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm4MB candidateS_match_encodeBetterBlockAsm4MB: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm4MB - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm4MB: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm4MB match_extend_back_loop_encodeBetterBlockAsm4MB: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm4MB - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm4MB - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm4MB JMP match_extend_back_loop_encodeBetterBlockAsm4MB match_extend_back_end_encodeBetterBlockAsm4MB: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 4(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 4(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm4MB: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm4MB - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm4MB matchlen_match8_match_nolit_encodeBetterBlockAsm4MB: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm4MB - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm4MB matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm4MB matchlen_match4_match_nolit_encodeBetterBlockAsm4MB: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm4MB - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm4MB - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm4MB: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm4MB JB match_nolit_end_encodeBetterBlockAsm4MB - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm4MB - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm4MB matchlen_match1_match_nolit_encodeBetterBlockAsm4MB: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm4MB - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm4MB: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm4MB - CMPL R11, $0x01 + CMPL R12, $0x01 JA match_length_ok_encodeBetterBlockAsm4MB - CMPL DI, $0x0000ffff + CMPL R8, $0x0000ffff JBE match_length_ok_encodeBetterBlockAsm4MB - MOVL 20(SP), CX - INCL CX + MOVL 20(SP), DX + INCL DX JMP search_loop_encodeBetterBlockAsm4MB match_length_ok_encodeBetterBlockAsm4MB: - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm4MB - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm4MB - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm4MB - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_encodeBetterBlockAsm4MB - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeBetterBlockAsm4MB three_bytes_match_emit_encodeBetterBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm4MB two_bytes_match_emit_encodeBetterBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm4MB JMP memmove_long_match_emit_encodeBetterBlockAsm4MB one_byte_match_emit_encodeBetterBlockAsm4MB: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm4MB: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm4MB memmove_long_match_emit_encodeBetterBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm4MB: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB two_byte_offset_match_nolit_encodeBetterBlockAsm4MB - CMPL R11, $0x40 + CMPL R12, $0x40 JBE four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB - MOVB $0xff, (AX) - MOVL DI, 1(AX) - LEAL -64(R11), R11 - ADDQ $0x05, AX - CMPL R11, $0x04 + MOVB $0xff, (CX) + MOVL R8, 1(CX) + LEAL -64(R12), R12 + ADDQ $0x05, CX + CMPL R12, $0x04 JB four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB: - TESTL R11, R11 + TESTL R12, R12 JZ match_nolit_emitcopy_end_encodeBetterBlockAsm4MB - XORL BX, BX - LEAL -1(BX)(R11*4), R11 - MOVB R11, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + XORL SI, SI + LEAL -1(SI)(R12*4), R12 + MOVB R12, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB two_byte_offset_match_nolit_encodeBetterBlockAsm4MB: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm4MB - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB long_offset_short_match_nolit_encodeBetterBlockAsm4MB: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB emit_copy_three_match_nolit_encodeBetterBlockAsm4MB: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB match_is_repeat_encodeBetterBlockAsm4MB: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm4MB - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm4MB JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB one_byte_match_emit_repeat_encodeBetterBlockAsm4MB: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm4MB: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm4MB - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBetterBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm4MB: - MOVQ $0x00cf1bbcdcbfa563, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x32, R10 - SHLQ $0x08, R11 - IMULQ BX, R11 - SHRQ $0x2f, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x32, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 524312(SP)(R10*4) - MOVL R13, 524312(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x00cf1bbcdcbfa563, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x08, R10 + IMULQ SI, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x32, R11 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x2f, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x32, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 524288(AX)(R11*4) + MOVL R14, 524288(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm4MB: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm4MB - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x08, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x2f, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x08, R11 + IMULQ SI, R11 + SHRQ $0x2f, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm4MB emit_remainder_encodeBetterBlockAsm4MB: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 4(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 4(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm4MB: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm4MB @@ -7968,33 +7975,33 @@ emit_remainder_ok_encodeBetterBlockAsm4MB: JB three_bytes_emit_remainder_encodeBetterBlockAsm4MB MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB three_bytes_emit_remainder_encodeBetterBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB two_bytes_emit_remainder_encodeBetterBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm4MB JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB one_byte_emit_remainder_encodeBetterBlockAsm4MB: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm4MB: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -8010,73 +8017,73 @@ memmove_emit_remainder_encodeBetterBlockAsm4MB: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB memmove_long_emit_remainder_encodeBetterBlockAsm4MB: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back: MOVOU (SI), X4 @@ -8090,756 +8097,757 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm12B(dst []byte, src []byte) int +// func encodeBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm12B(SB), $81944-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000280, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm12B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000280, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm12B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm12B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm12B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm12B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x34, R10 - MOVL 24(SP)(R9*4), BX - MOVL 65560(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 65560(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x34, R11 + MOVL (AX)(R10*4), SI + MOVL 65536(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 65536(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm12B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm12B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm12B no_short_found_encodeBetterBlockAsm12B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm12B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm12B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm12B candidateS_match_encodeBetterBlockAsm12B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm12B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm12B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm12B match_extend_back_loop_encodeBetterBlockAsm12B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm12B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm12B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm12B JMP match_extend_back_loop_encodeBetterBlockAsm12B match_extend_back_end_encodeBetterBlockAsm12B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm12B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm12B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm12B matchlen_match8_match_nolit_encodeBetterBlockAsm12B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm12B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm12B matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm12B matchlen_match4_match_nolit_encodeBetterBlockAsm12B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm12B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm12B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm12B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm12B JB match_nolit_end_encodeBetterBlockAsm12B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm12B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm12B matchlen_match1_match_nolit_encodeBetterBlockAsm12B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm12B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm12B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm12B - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm12B JB three_bytes_match_emit_encodeBetterBlockAsm12B three_bytes_match_emit_encodeBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm12B two_bytes_match_emit_encodeBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm12B JMP memmove_long_match_emit_encodeBetterBlockAsm12B one_byte_match_emit_encodeBetterBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm12B memmove_long_match_emit_encodeBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm12B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm12B - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B long_offset_short_match_nolit_encodeBetterBlockAsm12B: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm12B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeBetterBlockAsm12B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B emit_copy_three_match_nolit_encodeBetterBlockAsm12B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B match_is_repeat_encodeBetterBlockAsm12B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm12B JB three_bytes_match_emit_repeat_encodeBetterBlockAsm12B three_bytes_match_emit_repeat_encodeBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B two_bytes_match_emit_repeat_encodeBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm12B JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B one_byte_match_emit_repeat_encodeBetterBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B memmove_long_match_emit_repeat_encodeBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm12B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm12B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm12B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x32, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x34, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x32, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x34, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 65560(SP)(R10*4) - MOVL R13, 65560(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x32, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x34, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x32, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x34, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 65536(AX)(R11*4) + MOVL R14, 65536(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm12B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm12B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x32, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm12B emit_remainder_encodeBetterBlockAsm12B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm12B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm12B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm12B @@ -8848,26 +8856,26 @@ emit_remainder_ok_encodeBetterBlockAsm12B: JB three_bytes_emit_remainder_encodeBetterBlockAsm12B three_bytes_emit_remainder_encodeBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B two_bytes_emit_remainder_encodeBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm12B JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B one_byte_emit_remainder_encodeBetterBlockAsm12B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -8883,73 +8891,73 @@ memmove_emit_remainder_encodeBetterBlockAsm12B: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm12B memmove_long_emit_remainder_encodeBetterBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back: MOVOU (SI), X4 @@ -8963,756 +8971,757 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 - JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm12B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm10B(dst []byte, src []byte) int +// func encodeBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm10B(SB), $20504-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x000000a0, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm10B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x000000a0, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm10B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm10B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm10B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm10B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x36, R10 - MOVL 24(SP)(R9*4), BX - MOVL 16408(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 16408(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x36, R11 + MOVL (AX)(R10*4), SI + MOVL 16384(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 16384(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm10B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm10B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm10B no_short_found_encodeBetterBlockAsm10B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm10B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm10B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm10B candidateS_match_encodeBetterBlockAsm10B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm10B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm10B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm10B match_extend_back_loop_encodeBetterBlockAsm10B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm10B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm10B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm10B JMP match_extend_back_loop_encodeBetterBlockAsm10B match_extend_back_end_encodeBetterBlockAsm10B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm10B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm10B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm10B matchlen_match8_match_nolit_encodeBetterBlockAsm10B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm10B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm10B matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm10B matchlen_match4_match_nolit_encodeBetterBlockAsm10B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm10B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm10B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm10B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm10B JB match_nolit_end_encodeBetterBlockAsm10B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm10B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm10B matchlen_match1_match_nolit_encodeBetterBlockAsm10B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm10B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm10B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm10B - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm10B JB three_bytes_match_emit_encodeBetterBlockAsm10B three_bytes_match_emit_encodeBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm10B two_bytes_match_emit_encodeBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm10B JMP memmove_long_match_emit_encodeBetterBlockAsm10B one_byte_match_emit_encodeBetterBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm10B memmove_long_match_emit_encodeBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm10B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm10B - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B long_offset_short_match_nolit_encodeBetterBlockAsm10B: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm10B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeBetterBlockAsm10B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B emit_copy_three_match_nolit_encodeBetterBlockAsm10B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B match_is_repeat_encodeBetterBlockAsm10B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm10B JB three_bytes_match_emit_repeat_encodeBetterBlockAsm10B three_bytes_match_emit_repeat_encodeBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B two_bytes_match_emit_repeat_encodeBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm10B JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B one_byte_match_emit_repeat_encodeBetterBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B memmove_long_match_emit_repeat_encodeBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm10B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm10B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm10B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x34, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x36, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x34, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x36, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 16408(SP)(R10*4) - MOVL R13, 16408(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x34, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x36, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x34, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x36, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 16384(AX)(R11*4) + MOVL R14, 16384(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm10B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm10B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x34, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x34, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x34, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm10B emit_remainder_encodeBetterBlockAsm10B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm10B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm10B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm10B @@ -9721,26 +9730,26 @@ emit_remainder_ok_encodeBetterBlockAsm10B: JB three_bytes_emit_remainder_encodeBetterBlockAsm10B three_bytes_emit_remainder_encodeBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B two_bytes_emit_remainder_encodeBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm10B JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B one_byte_emit_remainder_encodeBetterBlockAsm10B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -9756,73 +9765,73 @@ memmove_emit_remainder_encodeBetterBlockAsm10B: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm10B memmove_long_emit_remainder_encodeBetterBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back: MOVOU (SI), X4 @@ -9836,742 +9845,743 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm10B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm8B(dst []byte, src []byte) int +// func encodeBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm8B(SB), $5144-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000028, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm8B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000028, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm8B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm8B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm8B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm8B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x38, R10 - MOVL 24(SP)(R9*4), BX - MOVL 4120(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 4120(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x38, R11 + MOVL (AX)(R10*4), SI + MOVL 4096(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 4096(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm8B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm8B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm8B no_short_found_encodeBetterBlockAsm8B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm8B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm8B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm8B candidateS_match_encodeBetterBlockAsm8B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm8B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm8B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm8B match_extend_back_loop_encodeBetterBlockAsm8B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm8B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm8B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm8B JMP match_extend_back_loop_encodeBetterBlockAsm8B match_extend_back_end_encodeBetterBlockAsm8B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm8B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm8B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm8B matchlen_match8_match_nolit_encodeBetterBlockAsm8B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm8B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm8B matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm8B matchlen_match4_match_nolit_encodeBetterBlockAsm8B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm8B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm8B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm8B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm8B JB match_nolit_end_encodeBetterBlockAsm8B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm8B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm8B matchlen_match1_match_nolit_encodeBetterBlockAsm8B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm8B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm8B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm8B - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm8B JB three_bytes_match_emit_encodeBetterBlockAsm8B three_bytes_match_emit_encodeBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm8B two_bytes_match_emit_encodeBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm8B JMP memmove_long_match_emit_encodeBetterBlockAsm8B one_byte_match_emit_encodeBetterBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm8B memmove_long_match_emit_encodeBetterBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm8B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm8B - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B long_offset_short_match_nolit_encodeBetterBlockAsm8B: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B -repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX - JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX +repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX + JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm8B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B emit_copy_three_match_nolit_encodeBetterBlockAsm8B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B match_is_repeat_encodeBetterBlockAsm8B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm8B JB three_bytes_match_emit_repeat_encodeBetterBlockAsm8B three_bytes_match_emit_repeat_encodeBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B two_bytes_match_emit_repeat_encodeBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm8B JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B one_byte_match_emit_repeat_encodeBetterBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm8B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x04 + CMPQ R8, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4 - CMPQ DI, $0x08 + CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4: - MOVL (R8), R9 - MOVL R9, (AX) + MOVL (R9), R10 + MOVL R10, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7: - MOVL (R8), R9 - MOVL -4(R8)(DI*1), R8 - MOVL R9, (AX) - MOVL R8, -4(AX)(DI*1) + MOVL (R9), R10 + MOVL -4(R9)(R8*1), R9 + MOVL R10, (CX) + MOVL R9, -4(CX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B memmove_long_match_emit_repeat_encodeBetterBlockAsm8B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R12 - SUBQ R9, R12 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R13 + SUBQ R10, R13 + DECQ R11 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R8)(R12*1), R9 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R9)(R13*1), R10 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R10 ADDQ $0x20, R13 - ADDQ $0x20, R9 - ADDQ $0x20, R12 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R8)(R12*1), X4 - MOVOU -16(R8)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ DI, R12 + MOVOU -32(R9)(R13*1), X4 + MOVOU -16(R9)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm8B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm8B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm8B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x36, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x38, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x36, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x38, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 4120(SP)(R10*4) - MOVL R13, 4120(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x36, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x38, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x36, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x38, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 4096(AX)(R11*4) + MOVL R14, 4096(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm8B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm8B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x36, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x36, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x36, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm8B emit_remainder_encodeBetterBlockAsm8B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm8B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm8B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm8B @@ -10580,26 +10590,26 @@ emit_remainder_ok_encodeBetterBlockAsm8B: JB three_bytes_emit_remainder_encodeBetterBlockAsm8B three_bytes_emit_remainder_encodeBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B two_bytes_emit_remainder_encodeBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm8B JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B one_byte_emit_remainder_encodeBetterBlockAsm8B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -10615,73 +10625,73 @@ memmove_emit_remainder_encodeBetterBlockAsm8B: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm8B memmove_long_emit_remainder_encodeBetterBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back: MOVOU (SI), X4 @@ -10695,798 +10705,799 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm8B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm(dst []byte, src []byte) int +// func encodeSnappyBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm(SB), $65560-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000200, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm repeat_extend_back_loop_encodeSnappyBlockAsm: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm repeat_extend_back_end_encodeSnappyBlockAsm: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 5(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 5(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_repeat_emit_encodeSnappyBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_repeat_emit_encodeSnappyBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm four_bytes_repeat_emit_encodeSnappyBlockAsm: - MOVL BX, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX + MOVL SI, R10 + SHRL $0x10, R10 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R10, 3(CX) + ADDQ $0x04, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm three_bytes_repeat_emit_encodeSnappyBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm two_bytes_repeat_emit_encodeSnappyBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm JMP memmove_long_repeat_emit_encodeSnappyBlockAsm one_byte_repeat_emit_encodeSnappyBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm memmove_long_repeat_emit_encodeSnappyBlockAsm: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_match8_repeat_extend_encodeSnappyBlockAsm: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_match4_repeat_extend_encodeSnappyBlockAsm: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm JB repeat_extend_forward_end_encodeSnappyBlockAsm - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_match1_repeat_extend_encodeSnappyBlockAsm: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm - MOVB $0xff, (AX) - MOVL SI, 1(AX) - LEAL -64(BX), BX - ADDQ $0x05, AX - CMPL BX, $0x04 + MOVB $0xff, (CX) + MOVL DI, 1(CX) + LEAL -64(SI), SI + ADDQ $0x05, CX + CMPL SI, $0x04 JB four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm: - TESTL BX, BX + TESTL SI, SI JZ repeat_end_emit_encodeSnappyBlockAsm - XORL DI, DI - LEAL -1(DI)(BX*4), BX - MOVB BL, (AX) - MOVL SI, 1(AX) - ADDQ $0x05, AX + XORL R8, R8 + LEAL -1(R8)(SI*4), SI + MOVB SI, (CX) + MOVL DI, 1(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeSnappyBlockAsm two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm no_repeat_found_encodeSnappyBlockAsm: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm candidate3_match_encodeSnappyBlockAsm: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm candidate2_match_encodeSnappyBlockAsm: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm match_extend_back_loop_encodeSnappyBlockAsm: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm JMP match_extend_back_loop_encodeSnappyBlockAsm match_extend_back_end_encodeSnappyBlockAsm: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB three_bytes_match_emit_encodeSnappyBlockAsm - CMPL DI, $0x01000000 + CMPL R8, $0x01000000 JB four_bytes_match_emit_encodeSnappyBlockAsm - MOVB $0xfc, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm four_bytes_match_emit_encodeSnappyBlockAsm: - MOVL DI, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW DI, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX + MOVL R8, R10 + SHRL $0x10, R10 + MOVB $0xf8, (CX) + MOVW R8, 1(CX) + MOVB R10, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm three_bytes_match_emit_encodeSnappyBlockAsm: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm two_bytes_match_emit_encodeSnappyBlockAsm: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm JMP memmove_long_match_emit_encodeSnappyBlockAsm one_byte_match_emit_encodeSnappyBlockAsm: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm memmove_long_match_emit_encodeSnappyBlockAsm: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm: match_nolit_loop_encodeSnappyBlockAsm: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm matchlen_bsf_16match_nolit_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm matchlen_match8_match_nolit_encodeSnappyBlockAsm: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm matchlen_match4_match_nolit_encodeSnappyBlockAsm: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm JB match_nolit_end_encodeSnappyBlockAsm - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm matchlen_match1_match_nolit_encodeSnappyBlockAsm: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB two_byte_offset_match_nolit_encodeSnappyBlockAsm four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE four_bytes_remain_match_nolit_encodeSnappyBlockAsm - MOVB $0xff, (AX) - MOVL BX, 1(AX) - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 + MOVB $0xff, (CX) + MOVL SI, 1(CX) + LEAL -64(R10), R10 + ADDQ $0x05, CX + CMPL R10, $0x04 JB four_bytes_remain_match_nolit_encodeSnappyBlockAsm JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm four_bytes_remain_match_nolit_encodeSnappyBlockAsm: - TESTL R9, R9 + TESTL R10, R10 JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm - XORL SI, SI - LEAL -1(SI)(R9*4), R9 - MOVB R9, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + XORL DI, DI + LEAL -1(DI)(R10*4), R10 + MOVB R10, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm two_byte_offset_match_nolit_encodeSnappyBlockAsm: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm two_byte_offset_short_match_nolit_encodeSnappyBlockAsm: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm emit_copy_three_match_nolit_encodeSnappyBlockAsm: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x32, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x32, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x32, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x32, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm emit_remainder_encodeSnappyBlockAsm: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm @@ -11496,41 +11507,41 @@ emit_remainder_ok_encodeSnappyBlockAsm: JB three_bytes_emit_remainder_encodeSnappyBlockAsm CMPL DX, $0x01000000 JB four_bytes_emit_remainder_encodeSnappyBlockAsm - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL DX, 1(CX) + ADDQ $0x05, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm four_bytes_emit_remainder_encodeSnappyBlockAsm: MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm three_bytes_emit_remainder_encodeSnappyBlockAsm: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm two_bytes_emit_remainder_encodeSnappyBlockAsm: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm JMP memmove_long_emit_remainder_encodeSnappyBlockAsm one_byte_emit_remainder_encodeSnappyBlockAsm: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -11546,73 +11557,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm memmove_long_emit_remainder_encodeSnappyBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back: MOVOU (SI), X4 @@ -11626,718 +11637,719 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm64K(dst []byte, src []byte) int +// func encodeSnappyBlockAsm64K(dst []byte, src []byte, tmp *[65536]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm64K(SB), $65560-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000200, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm64K(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm64K: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm64K MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm64K: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm64K - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm64K - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm64K repeat_extend_back_loop_encodeSnappyBlockAsm64K: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm64K - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm64K - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm64K repeat_extend_back_end_encodeSnappyBlockAsm64K: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm64K: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm64K - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm64K JB three_bytes_repeat_emit_encodeSnappyBlockAsm64K three_bytes_repeat_emit_encodeSnappyBlockAsm64K: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm64K two_bytes_repeat_emit_encodeSnappyBlockAsm64K: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm64K JMP memmove_long_repeat_emit_encodeSnappyBlockAsm64K one_byte_repeat_emit_encodeSnappyBlockAsm64K: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm64K: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K memmove_long_repeat_emit_encodeSnappyBlockAsm64K: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm64K matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm64K matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K JB repeat_extend_forward_end_encodeSnappyBlockAsm64K - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm64K matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm64K - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm64K: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm64K emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm64K: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm64K no_repeat_found_encodeSnappyBlockAsm64K: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm64K - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm64K - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm64K - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm64K candidate3_match_encodeSnappyBlockAsm64K: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm64K candidate2_match_encodeSnappyBlockAsm64K: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm64K: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm64K match_extend_back_loop_encodeSnappyBlockAsm64K: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm64K - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm64K - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm64K JMP match_extend_back_loop_encodeSnappyBlockAsm64K match_extend_back_end_encodeSnappyBlockAsm64K: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm64K: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm64K - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm64K - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm64K JB three_bytes_match_emit_encodeSnappyBlockAsm64K three_bytes_match_emit_encodeSnappyBlockAsm64K: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm64K two_bytes_match_emit_encodeSnappyBlockAsm64K: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm64K JMP memmove_long_match_emit_encodeSnappyBlockAsm64K one_byte_match_emit_encodeSnappyBlockAsm64K: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm64K: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm64K: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm64K memmove_long_match_emit_encodeSnappyBlockAsm64K: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm64K: match_nolit_loop_encodeSnappyBlockAsm64K: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm64K - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm64K matchlen_match8_match_nolit_encodeSnappyBlockAsm64K: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm64K - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm64K matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm64K matchlen_match4_match_nolit_encodeSnappyBlockAsm64K: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm64K - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm64K - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm64K: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm64K JB match_nolit_end_encodeSnappyBlockAsm64K - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm64K - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm64K matchlen_match1_match_nolit_encodeSnappyBlockAsm64K: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm64K - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm64K: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm64K: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm64K two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm64K - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm64K - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm64K emit_copy_three_match_nolit_encodeSnappyBlockAsm64K: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm64K: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm64K - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm64K: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x32, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x32, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x32, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x32, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm64K - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm64K emit_remainder_encodeSnappyBlockAsm64K: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm64K: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm64K @@ -12346,26 +12358,26 @@ emit_remainder_ok_encodeSnappyBlockAsm64K: JB three_bytes_emit_remainder_encodeSnappyBlockAsm64K three_bytes_emit_remainder_encodeSnappyBlockAsm64K: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm64K two_bytes_emit_remainder_encodeSnappyBlockAsm64K: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm64K JMP memmove_long_emit_remainder_encodeSnappyBlockAsm64K one_byte_emit_remainder_encodeSnappyBlockAsm64K: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm64K: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -12381,73 +12393,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm64K: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K -emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) +emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_4through7: + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K memmove_long_emit_remainder_encodeSnappyBlockAsm64K: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back: MOVOU (SI), X4 @@ -12461,718 +12473,719 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm12B(dst []byte, src []byte) int +// func encodeSnappyBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm12B(SB), $16408-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000080, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm12B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000080, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm12B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm12B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm12B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm12B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x000000cf1bbcdcbb, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x18, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x000000cf1bbcdcbb, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x18, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x34, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x18, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x18, R11 + IMULQ R9, R11 + SHRQ $0x34, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x18, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm12B - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm12B repeat_extend_back_loop_encodeSnappyBlockAsm12B: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm12B - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm12B - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm12B repeat_extend_back_end_encodeSnappyBlockAsm12B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm12B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm12B JB three_bytes_repeat_emit_encodeSnappyBlockAsm12B three_bytes_repeat_emit_encodeSnappyBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B two_bytes_repeat_emit_encodeSnappyBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm12B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B one_byte_repeat_emit_encodeSnappyBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm12B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B memmove_long_repeat_emit_encodeSnappyBlockAsm12B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B JB repeat_extend_forward_end_encodeSnappyBlockAsm12B - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm12B matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm12B - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm12B: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm12B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm12B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm12B no_repeat_found_encodeSnappyBlockAsm12B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm12B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm12B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm12B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm12B candidate3_match_encodeSnappyBlockAsm12B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm12B candidate2_match_encodeSnappyBlockAsm12B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm12B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm12B match_extend_back_loop_encodeSnappyBlockAsm12B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm12B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm12B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm12B JMP match_extend_back_loop_encodeSnappyBlockAsm12B match_extend_back_end_encodeSnappyBlockAsm12B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm12B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm12B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm12B JB three_bytes_match_emit_encodeSnappyBlockAsm12B three_bytes_match_emit_encodeSnappyBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm12B two_bytes_match_emit_encodeSnappyBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm12B JMP memmove_long_match_emit_encodeSnappyBlockAsm12B one_byte_match_emit_encodeSnappyBlockAsm12B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm12B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm12B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm12B memmove_long_match_emit_encodeSnappyBlockAsm12B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm12B: match_nolit_loop_encodeSnappyBlockAsm12B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm12B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm12B matchlen_match8_match_nolit_encodeSnappyBlockAsm12B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm12B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm12B matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm12B matchlen_match4_match_nolit_encodeSnappyBlockAsm12B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm12B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm12B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm12B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm12B JB match_nolit_end_encodeSnappyBlockAsm12B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm12B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm12B matchlen_match1_match_nolit_encodeSnappyBlockAsm12B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm12B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm12B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm12B: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm12B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm12B emit_copy_three_match_nolit_encodeSnappyBlockAsm12B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm12B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm12B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm12B: - MOVQ $0x000000cf1bbcdcbb, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x18, DI - IMULQ R8, DI - SHRQ $0x34, DI - SHLQ $0x18, BX - IMULQ R8, BX - SHRQ $0x34, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x000000cf1bbcdcbb, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x18, R8 + IMULQ R9, R8 + SHRQ $0x34, R8 + SHLQ $0x18, SI + IMULQ R9, SI + SHRQ $0x34, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm12B - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm12B emit_remainder_encodeSnappyBlockAsm12B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm12B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm12B @@ -13181,26 +13194,26 @@ emit_remainder_ok_encodeSnappyBlockAsm12B: JB three_bytes_emit_remainder_encodeSnappyBlockAsm12B three_bytes_emit_remainder_encodeSnappyBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B two_bytes_emit_remainder_encodeSnappyBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm12B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B one_byte_emit_remainder_encodeSnappyBlockAsm12B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -13216,73 +13229,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm12B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B memmove_long_emit_remainder_encodeSnappyBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back: MOVOU (SI), X4 @@ -13296,718 +13309,719 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm10B(dst []byte, src []byte) int +// func encodeSnappyBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm10B(SB), $4120-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000020, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm10B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000020, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm10B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm10B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm10B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm10B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x36, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x36, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm10B - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm10B repeat_extend_back_loop_encodeSnappyBlockAsm10B: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm10B - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm10B - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm10B repeat_extend_back_end_encodeSnappyBlockAsm10B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm10B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm10B JB three_bytes_repeat_emit_encodeSnappyBlockAsm10B three_bytes_repeat_emit_encodeSnappyBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B two_bytes_repeat_emit_encodeSnappyBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm10B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B one_byte_repeat_emit_encodeSnappyBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm10B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B memmove_long_repeat_emit_encodeSnappyBlockAsm10B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B JB repeat_extend_forward_end_encodeSnappyBlockAsm10B - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm10B matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm10B - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm10B: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm10B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm10B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm10B no_repeat_found_encodeSnappyBlockAsm10B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm10B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm10B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm10B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm10B candidate3_match_encodeSnappyBlockAsm10B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm10B candidate2_match_encodeSnappyBlockAsm10B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm10B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm10B match_extend_back_loop_encodeSnappyBlockAsm10B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm10B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm10B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm10B JMP match_extend_back_loop_encodeSnappyBlockAsm10B match_extend_back_end_encodeSnappyBlockAsm10B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm10B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm10B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm10B JB three_bytes_match_emit_encodeSnappyBlockAsm10B three_bytes_match_emit_encodeSnappyBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm10B two_bytes_match_emit_encodeSnappyBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm10B JMP memmove_long_match_emit_encodeSnappyBlockAsm10B one_byte_match_emit_encodeSnappyBlockAsm10B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm10B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm10B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm10B memmove_long_match_emit_encodeSnappyBlockAsm10B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm10B: match_nolit_loop_encodeSnappyBlockAsm10B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm10B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm10B matchlen_match8_match_nolit_encodeSnappyBlockAsm10B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm10B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm10B matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm10B matchlen_match4_match_nolit_encodeSnappyBlockAsm10B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm10B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm10B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm10B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm10B JB match_nolit_end_encodeSnappyBlockAsm10B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm10B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm10B matchlen_match1_match_nolit_encodeSnappyBlockAsm10B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm10B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm10B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm10B: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm10B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm10B emit_copy_three_match_nolit_encodeSnappyBlockAsm10B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm10B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm10B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm10B: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x36, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x36, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x36, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x36, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm10B - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm10B emit_remainder_encodeSnappyBlockAsm10B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm10B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm10B @@ -14016,26 +14030,26 @@ emit_remainder_ok_encodeSnappyBlockAsm10B: JB three_bytes_emit_remainder_encodeSnappyBlockAsm10B three_bytes_emit_remainder_encodeSnappyBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B two_bytes_emit_remainder_encodeSnappyBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm10B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B one_byte_emit_remainder_encodeSnappyBlockAsm10B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -14051,73 +14065,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm10B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B memmove_long_emit_remainder_encodeSnappyBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back: MOVOU (SI), X4 @@ -14131,714 +14145,715 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm8B(dst []byte, src []byte) int +// func encodeSnappyBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm8B(SB), $1048-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000008, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm8B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000008, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm8B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm8B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm8B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm8B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x38, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x38, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x38, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x38, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x38, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm8B - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm8B repeat_extend_back_loop_encodeSnappyBlockAsm8B: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm8B - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm8B - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm8B repeat_extend_back_end_encodeSnappyBlockAsm8B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm8B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm8B JB three_bytes_repeat_emit_encodeSnappyBlockAsm8B three_bytes_repeat_emit_encodeSnappyBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B two_bytes_repeat_emit_encodeSnappyBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm8B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B one_byte_repeat_emit_encodeSnappyBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm8B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B memmove_long_repeat_emit_encodeSnappyBlockAsm8B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B JB repeat_extend_forward_end_encodeSnappyBlockAsm8B - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm8B matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm8B - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm8B: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm8B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm8B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm8B no_repeat_found_encodeSnappyBlockAsm8B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm8B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm8B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm8B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm8B candidate3_match_encodeSnappyBlockAsm8B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm8B candidate2_match_encodeSnappyBlockAsm8B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm8B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm8B match_extend_back_loop_encodeSnappyBlockAsm8B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm8B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm8B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm8B JMP match_extend_back_loop_encodeSnappyBlockAsm8B match_extend_back_end_encodeSnappyBlockAsm8B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm8B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm8B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm8B JB three_bytes_match_emit_encodeSnappyBlockAsm8B three_bytes_match_emit_encodeSnappyBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm8B two_bytes_match_emit_encodeSnappyBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm8B JMP memmove_long_match_emit_encodeSnappyBlockAsm8B one_byte_match_emit_encodeSnappyBlockAsm8B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm8B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm8B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm8B memmove_long_match_emit_encodeSnappyBlockAsm8B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm8B: match_nolit_loop_encodeSnappyBlockAsm8B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm8B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm8B matchlen_match8_match_nolit_encodeSnappyBlockAsm8B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm8B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm8B matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm8B matchlen_match4_match_nolit_encodeSnappyBlockAsm8B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm8B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm8B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm8B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm8B JB match_nolit_end_encodeSnappyBlockAsm8B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm8B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm8B matchlen_match1_match_nolit_encodeSnappyBlockAsm8B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm8B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm8B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm8B: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm8B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm8B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm8B emit_copy_three_match_nolit_encodeSnappyBlockAsm8B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm8B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm8B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm8B: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x38, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x38, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x38, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x38, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm8B - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm8B emit_remainder_encodeSnappyBlockAsm8B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm8B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm8B @@ -14847,26 +14862,26 @@ emit_remainder_ok_encodeSnappyBlockAsm8B: JB three_bytes_emit_remainder_encodeSnappyBlockAsm8B three_bytes_emit_remainder_encodeSnappyBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B two_bytes_emit_remainder_encodeSnappyBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm8B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B one_byte_emit_remainder_encodeSnappyBlockAsm8B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -14882,73 +14897,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm8B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B memmove_long_emit_remainder_encodeSnappyBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back: MOVOU (SI), X4 @@ -14962,520 +14977,521 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm(SB), $589848-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00001200, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00001200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x07, BX - CMPL BX, $0x63 + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x07, SI + CMPL SI, $0x63 JBE check_maxskip_ok_encodeSnappyBetterBlockAsm - LEAL 100(CX), BX + LEAL 100(DX), SI JMP check_maxskip_cont_encodeSnappyBetterBlockAsm check_maxskip_ok_encodeSnappyBetterBlockAsm: - LEAL 1(CX)(BX*1), BX + LEAL 1(DX)(SI*1), SI check_maxskip_cont_encodeSnappyBetterBlockAsm: - CMPL BX, 8(SP) + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x00cf1bbcdcbfa563, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 524312(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 524312(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x00cf1bbcdcbfa563, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL 524288(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 524288(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm no_short_found_encodeSnappyBetterBlockAsm: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm candidateS_match_encodeSnappyBetterBlockAsm: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm match_extend_back_loop_encodeSnappyBetterBlockAsm: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm JMP match_extend_back_loop_encodeSnappyBetterBlockAsm match_extend_back_end_encodeSnappyBetterBlockAsm: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm JB match_nolit_end_encodeSnappyBetterBlockAsm - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL R11, $0x01 + CMPL R12, $0x01 JA match_length_ok_encodeSnappyBetterBlockAsm - CMPL DI, $0x0000ffff + CMPL R8, $0x0000ffff JBE match_length_ok_encodeSnappyBetterBlockAsm - MOVL 20(SP), CX - INCL CX + MOVL 20(SP), DX + INCL DX JMP search_loop_encodeSnappyBetterBlockAsm match_length_ok_encodeSnappyBetterBlockAsm: - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_encodeSnappyBetterBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_match_emit_encodeSnappyBetterBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm four_bytes_match_emit_encodeSnappyBetterBlockAsm: - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm three_bytes_match_emit_encodeSnappyBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm two_bytes_match_emit_encodeSnappyBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm one_byte_match_emit_encodeSnappyBetterBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm memmove_long_match_emit_encodeSnappyBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm - MOVB $0xff, (AX) - MOVL DI, 1(AX) - LEAL -64(R11), R11 - ADDQ $0x05, AX - CMPL R11, $0x04 + MOVB $0xff, (CX) + MOVL R8, 1(CX) + LEAL -64(R12), R12 + ADDQ $0x05, CX + CMPL R12, $0x04 JB four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm JMP four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm: - TESTL R11, R11 + TESTL R12, R12 JZ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm - XORL BX, BX - LEAL -1(BX)(R11*4), R11 - MOVB R11, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + XORL SI, SI + LEAL -1(SI)(R12*4), R12 + MOVB R12, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm: - MOVQ $0x00cf1bbcdcbfa563, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x32, R10 - SHLQ $0x08, R11 - IMULQ BX, R11 - SHRQ $0x2f, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x32, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 524312(SP)(R10*4) - MOVL R13, 524312(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x00cf1bbcdcbfa563, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x08, R10 + IMULQ SI, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x32, R11 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x2f, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x32, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 524288(AX)(R11*4) + MOVL R14, 524288(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x08, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x2f, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x08, R11 + IMULQ SI, R11 + SHRQ $0x2f, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm emit_remainder_encodeSnappyBetterBlockAsm: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm @@ -15485,41 +15501,41 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm CMPL DX, $0x01000000 JB four_bytes_emit_remainder_encodeSnappyBetterBlockAsm - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL DX, 1(CX) + ADDQ $0x05, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm four_bytes_emit_remainder_encodeSnappyBetterBlockAsm: MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm three_bytes_emit_remainder_encodeSnappyBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm two_bytes_emit_remainder_encodeSnappyBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm one_byte_emit_remainder_encodeSnappyBetterBlockAsm: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -15535,73 +15551,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm memmove_long_emit_remainder_encodeSnappyBetterBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back: MOVOU (SI), X4 @@ -15615,463 +15631,464 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_ba JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte, tmp *[294912]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm64K(SB), $327704-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000a00, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm64K(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000900, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm64K: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm64K MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm64K: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x07, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x07, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm64K - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x00cf1bbcdcbfa563, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x30, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 262168(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 262168(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x00cf1bbcdcbfa563, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x30, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x33, R11 + MOVL (AX)(R10*4), SI + MOVL 262144(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 262144(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm64K - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm64K - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm64K no_short_found_encodeSnappyBetterBlockAsm64K: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm64K - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm64K - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm64K candidateS_match_encodeSnappyBetterBlockAsm64K: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x30, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x30, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm64K - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm64K: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm64K match_extend_back_loop_encodeSnappyBetterBlockAsm64K: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm64K - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm64K - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm64K JMP match_extend_back_loop_encodeSnappyBetterBlockAsm64K match_extend_back_end_encodeSnappyBetterBlockAsm64K: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm64K: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm64K matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm64K matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K JB match_nolit_end_encodeSnappyBetterBlockAsm64K - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm64K matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm64K - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm64K: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm64K - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm64K JB three_bytes_match_emit_encodeSnappyBetterBlockAsm64K three_bytes_match_emit_encodeSnappyBetterBlockAsm64K: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm64K two_bytes_match_emit_encodeSnappyBetterBlockAsm64K: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm64K JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm64K one_byte_match_emit_encodeSnappyBetterBlockAsm64K: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm64K: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K memmove_long_match_emit_encodeSnappyBetterBlockAsm64K: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back - -emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + +emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32: + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm64K - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K: - MOVQ $0x00cf1bbcdcbfa563, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x30, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x32, R10 - SHLQ $0x08, R11 - IMULQ BX, R11 - SHRQ $0x30, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x32, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 262168(SP)(R10*4) - MOVL R13, 262168(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x00cf1bbcdcbfa563, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x08, R10 + IMULQ SI, R10 + SHRQ $0x30, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x33, R11 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x30, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x33, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 262144(AX)(R11*4) + MOVL R14, 262144(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm64K: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm64K - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x30, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x08, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x30, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x08, R11 + IMULQ SI, R11 + SHRQ $0x30, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm64K emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm64K: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K @@ -16080,26 +16097,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm64K: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K three_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm64K JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm64K: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -16115,73 +16132,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm64K: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back: MOVOU (SI), X4 @@ -16195,463 +16212,464 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm12B(SB), $81944-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000280, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm12B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000280, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm12B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm12B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm12B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm12B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x34, R10 - MOVL 24(SP)(R9*4), BX - MOVL 65560(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 65560(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x34, R11 + MOVL (AX)(R10*4), SI + MOVL 65536(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 65536(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm12B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm12B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm12B no_short_found_encodeSnappyBetterBlockAsm12B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm12B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm12B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm12B candidateS_match_encodeSnappyBetterBlockAsm12B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm12B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm12B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm12B match_extend_back_loop_encodeSnappyBetterBlockAsm12B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm12B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm12B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm12B JMP match_extend_back_loop_encodeSnappyBetterBlockAsm12B match_extend_back_end_encodeSnappyBetterBlockAsm12B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm12B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm12B matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm12B matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B JB match_nolit_end_encodeSnappyBetterBlockAsm12B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm12B matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm12B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm12B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm12B JB three_bytes_match_emit_encodeSnappyBetterBlockAsm12B three_bytes_match_emit_encodeSnappyBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm12B two_bytes_match_emit_encodeSnappyBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm12B JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm12B one_byte_match_emit_encodeSnappyBetterBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B memmove_long_match_emit_encodeSnappyBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm12B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x32, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x34, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x32, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x34, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 65560(SP)(R10*4) - MOVL R13, 65560(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x32, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x34, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x32, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x34, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 65536(AX)(R11*4) + MOVL R14, 65536(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm12B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm12B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x32, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm12B emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm12B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B @@ -16660,26 +16678,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm12B: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B three_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm12B JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -16695,73 +16713,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm12B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) - JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B - -emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) + JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B + +emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64: + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back: MOVOU (SI), X4 @@ -16775,463 +16793,464 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm10B(SB), $20504-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x000000a0, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm10B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x000000a0, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm10B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm10B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm10B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm10B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x36, R10 - MOVL 24(SP)(R9*4), BX - MOVL 16408(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 16408(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x36, R11 + MOVL (AX)(R10*4), SI + MOVL 16384(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 16384(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm10B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm10B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm10B no_short_found_encodeSnappyBetterBlockAsm10B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm10B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm10B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm10B candidateS_match_encodeSnappyBetterBlockAsm10B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm10B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm10B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm10B match_extend_back_loop_encodeSnappyBetterBlockAsm10B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm10B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm10B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm10B JMP match_extend_back_loop_encodeSnappyBetterBlockAsm10B match_extend_back_end_encodeSnappyBetterBlockAsm10B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm10B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm10B matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm10B matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B JB match_nolit_end_encodeSnappyBetterBlockAsm10B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm10B matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm10B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm10B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm10B JB three_bytes_match_emit_encodeSnappyBetterBlockAsm10B three_bytes_match_emit_encodeSnappyBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm10B two_bytes_match_emit_encodeSnappyBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm10B JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm10B one_byte_match_emit_encodeSnappyBetterBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B memmove_long_match_emit_encodeSnappyBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm10B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x34, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x36, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x34, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x36, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 16408(SP)(R10*4) - MOVL R13, 16408(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x34, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x36, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x34, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x36, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 16384(AX)(R11*4) + MOVL R14, 16384(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm10B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm10B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x34, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x34, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x34, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm10B emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm10B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B @@ -17240,26 +17259,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm10B: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B three_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm10B JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -17275,73 +17294,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm10B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back: MOVOU (SI), X4 @@ -17355,461 +17374,462 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm8B(SB), $5144-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000028, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm8B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000028, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm8B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm8B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm8B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm8B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x38, R10 - MOVL 24(SP)(R9*4), BX - MOVL 4120(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 4120(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x38, R11 + MOVL (AX)(R10*4), SI + MOVL 4096(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 4096(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm8B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm8B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm8B no_short_found_encodeSnappyBetterBlockAsm8B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm8B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm8B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm8B candidateS_match_encodeSnappyBetterBlockAsm8B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm8B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm8B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm8B match_extend_back_loop_encodeSnappyBetterBlockAsm8B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm8B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm8B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm8B JMP match_extend_back_loop_encodeSnappyBetterBlockAsm8B match_extend_back_end_encodeSnappyBetterBlockAsm8B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm8B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm8B matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm8B matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 - JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B - LEAL -4(DI), DI - LEAL 4(R11), R11 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 + JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B JB match_nolit_end_encodeSnappyBetterBlockAsm8B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm8B matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm8B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm8B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm8B JB three_bytes_match_emit_encodeSnappyBetterBlockAsm8B three_bytes_match_emit_encodeSnappyBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm8B two_bytes_match_emit_encodeSnappyBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm8B JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm8B one_byte_match_emit_encodeSnappyBetterBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B memmove_long_match_emit_encodeSnappyBetterBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm8B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x36, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x38, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x36, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x38, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 4120(SP)(R10*4) - MOVL R13, 4120(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x36, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x38, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x36, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x38, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 4096(AX)(R11*4) + MOVL R14, 4096(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm8B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm8B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x36, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x36, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x36, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm8B emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm8B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B @@ -17818,26 +17838,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm8B: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B three_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm8B JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -17853,73 +17873,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm8B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back: MOVOU (SI), X4 @@ -17933,1136 +17953,1142 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_ JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func calcBlockSize(src []byte) int +// func calcBlockSize(src []byte, tmp *[32768]byte) int // Requires: BMI, SSE2 -TEXT ·calcBlockSize(SB), $32792-32 - XORQ AX, AX - MOVQ $0x00000100, CX - LEAQ 24(SP), DX +TEXT ·calcBlockSize(SB), $24-40 + MOVQ tmp+24(FP), AX + XORQ CX, CX + MOVQ $0x00000100, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_calcBlockSize: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_calcBlockSize MOVL $0x00000000, 12(SP) - MOVQ src_len+8(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+0(FP), DX + MOVQ src_len+8(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+0(FP), BX search_loop_calcBlockSize: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_calcBlockSize - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x33, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x33, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x33, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x33, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x33, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_calcBlockSize - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_calcBlockSize repeat_extend_back_loop_calcBlockSize: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_calcBlockSize - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_calcBlockSize - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_calcBlockSize repeat_extend_back_end_calcBlockSize: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 5(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 5(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_calcBlockSize - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET repeat_dst_size_check_calcBlockSize: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_calcBlockSize - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_calcBlockSize - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_calcBlockSize - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_repeat_emit_calcBlockSize - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_repeat_emit_calcBlockSize - ADDQ $0x05, AX + ADDQ $0x05, CX JMP memmove_long_repeat_emit_calcBlockSize four_bytes_repeat_emit_calcBlockSize: - ADDQ $0x04, AX + ADDQ $0x04, CX JMP memmove_long_repeat_emit_calcBlockSize three_bytes_repeat_emit_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_repeat_emit_calcBlockSize two_bytes_repeat_emit_calcBlockSize: - ADDQ $0x02, AX - CMPL BX, $0x40 + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_calcBlockSize JMP memmove_long_repeat_emit_calcBlockSize one_byte_repeat_emit_calcBlockSize: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_repeat_emit_calcBlockSize: - LEAQ (AX)(DI*1), AX + LEAQ (CX)(R8*1), CX JMP emit_literal_done_repeat_emit_calcBlockSize memmove_long_repeat_emit_calcBlockSize: - LEAQ (AX)(DI*1), AX + LEAQ (CX)(R8*1), CX emit_literal_done_repeat_emit_calcBlockSize: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+8(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+8(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_calcBlockSize: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_calcBlockSize - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_calcBlockSize - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_calcBlockSize - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_calcBlockSize matchlen_bsf_16repeat_extend_calcBlockSize: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_calcBlockSize matchlen_match8_repeat_extend_calcBlockSize: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_calcBlockSize - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_calcBlockSize - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_calcBlockSize matchlen_bsf_8_repeat_extend_calcBlockSize: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_calcBlockSize matchlen_match4_repeat_extend_calcBlockSize: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_calcBlockSize - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_calcBlockSize - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_calcBlockSize: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_calcBlockSize JB repeat_extend_forward_end_calcBlockSize - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_calcBlockSize - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_calcBlockSize matchlen_match1_repeat_extend_calcBlockSize: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_calcBlockSize - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_calcBlockSize: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB two_byte_offset_repeat_as_copy_calcBlockSize four_bytes_loop_back_repeat_as_copy_calcBlockSize: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE four_bytes_remain_repeat_as_copy_calcBlockSize - LEAL -64(BX), BX - ADDQ $0x05, AX - CMPL BX, $0x04 + LEAL -64(SI), SI + ADDQ $0x05, CX + CMPL SI, $0x04 JB four_bytes_remain_repeat_as_copy_calcBlockSize JMP four_bytes_loop_back_repeat_as_copy_calcBlockSize four_bytes_remain_repeat_as_copy_calcBlockSize: - TESTL BX, BX + TESTL SI, SI JZ repeat_end_emit_calcBlockSize - XORL BX, BX - ADDQ $0x05, AX + XORL SI, SI + ADDQ $0x05, CX JMP repeat_end_emit_calcBlockSize two_byte_offset_repeat_as_copy_calcBlockSize: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_calcBlockSize - LEAL -60(BX), BX - ADDQ $0x03, AX + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_calcBlockSize two_byte_offset_short_repeat_as_copy_calcBlockSize: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_calcBlockSize - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_calcBlockSize - ADDQ $0x02, AX + ADDQ $0x02, CX JMP repeat_end_emit_calcBlockSize emit_copy_three_repeat_as_copy_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX repeat_end_emit_calcBlockSize: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_calcBlockSize no_repeat_found_calcBlockSize: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_calcBlockSize - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_calcBlockSize - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_calcBlockSize - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_calcBlockSize candidate3_match_calcBlockSize: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_calcBlockSize candidate2_match_calcBlockSize: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_calcBlockSize: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_calcBlockSize match_extend_back_loop_calcBlockSize: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_calcBlockSize - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_calcBlockSize - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_calcBlockSize JMP match_extend_back_loop_calcBlockSize match_extend_back_end_calcBlockSize: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_calcBlockSize - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET match_dst_size_check_calcBlockSize: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_calcBlockSize - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), SI - CMPL SI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), DI + CMPL DI, $0x3c JB one_byte_match_emit_calcBlockSize - CMPL SI, $0x00000100 + CMPL DI, $0x00000100 JB two_bytes_match_emit_calcBlockSize - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB three_bytes_match_emit_calcBlockSize - CMPL SI, $0x01000000 + CMPL DI, $0x01000000 JB four_bytes_match_emit_calcBlockSize - ADDQ $0x05, AX + ADDQ $0x05, CX JMP memmove_long_match_emit_calcBlockSize four_bytes_match_emit_calcBlockSize: - ADDQ $0x04, AX + ADDQ $0x04, CX JMP memmove_long_match_emit_calcBlockSize three_bytes_match_emit_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_match_emit_calcBlockSize two_bytes_match_emit_calcBlockSize: - ADDQ $0x02, AX - CMPL SI, $0x40 + ADDQ $0x02, CX + CMPL DI, $0x40 JB memmove_match_emit_calcBlockSize JMP memmove_long_match_emit_calcBlockSize one_byte_match_emit_calcBlockSize: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_match_emit_calcBlockSize: - LEAQ (AX)(R8*1), AX + LEAQ (CX)(R9*1), CX JMP emit_literal_done_match_emit_calcBlockSize memmove_long_match_emit_calcBlockSize: - LEAQ (AX)(R8*1), AX + LEAQ (CX)(R9*1), CX emit_literal_done_match_emit_calcBlockSize: match_nolit_loop_calcBlockSize: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+8(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+8(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_calcBlockSize: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_calcBlockSize - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_calcBlockSize - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_calcBlockSize - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_calcBlockSize matchlen_bsf_16match_nolit_calcBlockSize: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_calcBlockSize matchlen_match8_match_nolit_calcBlockSize: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_calcBlockSize - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_calcBlockSize - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_calcBlockSize matchlen_bsf_8_match_nolit_calcBlockSize: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_calcBlockSize matchlen_match4_match_nolit_calcBlockSize: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_calcBlockSize - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_calcBlockSize - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_calcBlockSize: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_calcBlockSize JB match_nolit_end_calcBlockSize - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_calcBlockSize - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_calcBlockSize matchlen_match1_match_nolit_calcBlockSize: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_calcBlockSize - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_calcBlockSize: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB two_byte_offset_match_nolit_calcBlockSize four_bytes_loop_back_match_nolit_calcBlockSize: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE four_bytes_remain_match_nolit_calcBlockSize - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 + LEAL -64(R10), R10 + ADDQ $0x05, CX + CMPL R10, $0x04 JB four_bytes_remain_match_nolit_calcBlockSize JMP four_bytes_loop_back_match_nolit_calcBlockSize four_bytes_remain_match_nolit_calcBlockSize: - TESTL R9, R9 + TESTL R10, R10 JZ match_nolit_emitcopy_end_calcBlockSize - XORL BX, BX - ADDQ $0x05, AX + XORL SI, SI + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_calcBlockSize two_byte_offset_match_nolit_calcBlockSize: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_calcBlockSize - LEAL -60(R9), R9 - ADDQ $0x03, AX + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_calcBlockSize two_byte_offset_short_match_nolit_calcBlockSize: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_calcBlockSize - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_calcBlockSize - ADDQ $0x02, AX + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_calcBlockSize emit_copy_three_match_nolit_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX match_nolit_emitcopy_end_calcBlockSize: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_calcBlockSize - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_calcBlockSize - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET match_nolit_dst_ok_calcBlockSize: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x33, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x33, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x33, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x33, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_calcBlockSize - INCL CX + INCL DX JMP search_loop_calcBlockSize emit_remainder_calcBlockSize: - MOVQ src_len+8(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+8(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_calcBlockSize - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET emit_remainder_ok_calcBlockSize: - MOVQ src_len+8(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+8(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_calcBlockSize - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI - LEAL -1(SI), CX - CMPL CX, $0x3c + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI + LEAL -1(SI), AX + CMPL AX, $0x3c JB one_byte_emit_remainder_calcBlockSize - CMPL CX, $0x00000100 + CMPL AX, $0x00000100 JB two_bytes_emit_remainder_calcBlockSize - CMPL CX, $0x00010000 + CMPL AX, $0x00010000 JB three_bytes_emit_remainder_calcBlockSize - CMPL CX, $0x01000000 + CMPL AX, $0x01000000 JB four_bytes_emit_remainder_calcBlockSize - ADDQ $0x05, AX + ADDQ $0x05, CX JMP memmove_long_emit_remainder_calcBlockSize four_bytes_emit_remainder_calcBlockSize: - ADDQ $0x04, AX + ADDQ $0x04, CX JMP memmove_long_emit_remainder_calcBlockSize three_bytes_emit_remainder_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_emit_remainder_calcBlockSize two_bytes_emit_remainder_calcBlockSize: - ADDQ $0x02, AX - CMPL CX, $0x40 + ADDQ $0x02, CX + CMPL AX, $0x40 JB memmove_emit_remainder_calcBlockSize JMP memmove_long_emit_remainder_calcBlockSize one_byte_emit_remainder_calcBlockSize: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_emit_remainder_calcBlockSize: - LEAQ (AX)(SI*1), AX + LEAQ (CX)(SI*1), AX + MOVQ AX, CX JMP emit_literal_done_emit_remainder_calcBlockSize memmove_long_emit_remainder_calcBlockSize: - LEAQ (AX)(SI*1), AX + LEAQ (CX)(SI*1), AX + MOVQ AX, CX emit_literal_done_emit_remainder_calcBlockSize: - MOVQ AX, ret+24(FP) + MOVQ CX, ret+32(FP) RET -// func calcBlockSizeSmall(src []byte) int +// func calcBlockSizeSmall(src []byte, tmp *[2048]byte) int // Requires: BMI, SSE2 -TEXT ·calcBlockSizeSmall(SB), $2072-32 - XORQ AX, AX - MOVQ $0x00000010, CX - LEAQ 24(SP), DX +TEXT ·calcBlockSizeSmall(SB), $24-40 + MOVQ tmp+24(FP), AX + XORQ CX, CX + MOVQ $0x00000010, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_calcBlockSizeSmall: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_calcBlockSizeSmall MOVL $0x00000000, 12(SP) - MOVQ src_len+8(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+0(FP), DX + MOVQ src_len+8(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+0(FP), BX search_loop_calcBlockSizeSmall: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_calcBlockSizeSmall - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x37, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x37, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x37, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x37, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x37, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_calcBlockSizeSmall - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_calcBlockSizeSmall repeat_extend_back_loop_calcBlockSizeSmall: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_calcBlockSizeSmall - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_calcBlockSizeSmall - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_calcBlockSizeSmall repeat_extend_back_end_calcBlockSizeSmall: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_calcBlockSizeSmall - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET repeat_dst_size_check_calcBlockSizeSmall: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_calcBlockSizeSmall - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_calcBlockSizeSmall - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_calcBlockSizeSmall JB three_bytes_repeat_emit_calcBlockSizeSmall three_bytes_repeat_emit_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_repeat_emit_calcBlockSizeSmall two_bytes_repeat_emit_calcBlockSizeSmall: - ADDQ $0x02, AX - CMPL BX, $0x40 + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_calcBlockSizeSmall JMP memmove_long_repeat_emit_calcBlockSizeSmall one_byte_repeat_emit_calcBlockSizeSmall: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_repeat_emit_calcBlockSizeSmall: - LEAQ (AX)(DI*1), AX + LEAQ (CX)(R8*1), CX JMP emit_literal_done_repeat_emit_calcBlockSizeSmall memmove_long_repeat_emit_calcBlockSizeSmall: - LEAQ (AX)(DI*1), AX + LEAQ (CX)(R8*1), CX emit_literal_done_repeat_emit_calcBlockSizeSmall: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+8(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+8(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_calcBlockSizeSmall: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_calcBlockSizeSmall - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_calcBlockSizeSmall - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_calcBlockSizeSmall - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_calcBlockSizeSmall matchlen_bsf_16repeat_extend_calcBlockSizeSmall: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_calcBlockSizeSmall matchlen_match8_repeat_extend_calcBlockSizeSmall: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_calcBlockSizeSmall - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_calcBlockSizeSmall - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_calcBlockSizeSmall matchlen_bsf_8_repeat_extend_calcBlockSizeSmall: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_calcBlockSizeSmall matchlen_match4_repeat_extend_calcBlockSizeSmall: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_calcBlockSizeSmall - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_calcBlockSizeSmall - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_calcBlockSizeSmall: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_calcBlockSizeSmall JB repeat_extend_forward_end_calcBlockSizeSmall - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_calcBlockSizeSmall - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_calcBlockSizeSmall matchlen_match1_repeat_extend_calcBlockSizeSmall: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_calcBlockSizeSmall - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_calcBlockSizeSmall: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_calcBlockSizeSmall: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_calcBlockSizeSmall - LEAL -60(BX), BX - ADDQ $0x03, AX + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_calcBlockSizeSmall two_byte_offset_short_repeat_as_copy_calcBlockSizeSmall: - MOVL BX, SI - SHLL $0x02, SI - CMPL BX, $0x0c + MOVL SI, DI + SHLL $0x02, DI + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_calcBlockSizeSmall - ADDQ $0x02, AX + ADDQ $0x02, CX JMP repeat_end_emit_calcBlockSizeSmall emit_copy_three_repeat_as_copy_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX repeat_end_emit_calcBlockSizeSmall: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_calcBlockSizeSmall no_repeat_found_calcBlockSizeSmall: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_calcBlockSizeSmall - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_calcBlockSizeSmall - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_calcBlockSizeSmall - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_calcBlockSizeSmall candidate3_match_calcBlockSizeSmall: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_calcBlockSizeSmall candidate2_match_calcBlockSizeSmall: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_calcBlockSizeSmall: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_calcBlockSizeSmall match_extend_back_loop_calcBlockSizeSmall: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_calcBlockSizeSmall - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_calcBlockSizeSmall - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_calcBlockSizeSmall JMP match_extend_back_loop_calcBlockSizeSmall match_extend_back_end_calcBlockSizeSmall: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_calcBlockSizeSmall - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET match_dst_size_check_calcBlockSizeSmall: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_calcBlockSizeSmall - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), SI - CMPL SI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), DI + CMPL DI, $0x3c JB one_byte_match_emit_calcBlockSizeSmall - CMPL SI, $0x00000100 + CMPL DI, $0x00000100 JB two_bytes_match_emit_calcBlockSizeSmall JB three_bytes_match_emit_calcBlockSizeSmall three_bytes_match_emit_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_match_emit_calcBlockSizeSmall two_bytes_match_emit_calcBlockSizeSmall: - ADDQ $0x02, AX - CMPL SI, $0x40 + ADDQ $0x02, CX + CMPL DI, $0x40 JB memmove_match_emit_calcBlockSizeSmall JMP memmove_long_match_emit_calcBlockSizeSmall one_byte_match_emit_calcBlockSizeSmall: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_match_emit_calcBlockSizeSmall: - LEAQ (AX)(R8*1), AX + LEAQ (CX)(R9*1), CX JMP emit_literal_done_match_emit_calcBlockSizeSmall memmove_long_match_emit_calcBlockSizeSmall: - LEAQ (AX)(R8*1), AX + LEAQ (CX)(R9*1), CX emit_literal_done_match_emit_calcBlockSizeSmall: match_nolit_loop_calcBlockSizeSmall: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+8(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+8(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_calcBlockSizeSmall: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_calcBlockSizeSmall - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_calcBlockSizeSmall - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_calcBlockSizeSmall - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_calcBlockSizeSmall matchlen_bsf_16match_nolit_calcBlockSizeSmall: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_calcBlockSizeSmall matchlen_match8_match_nolit_calcBlockSizeSmall: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_calcBlockSizeSmall - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_calcBlockSizeSmall - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_calcBlockSizeSmall matchlen_bsf_8_match_nolit_calcBlockSizeSmall: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_calcBlockSizeSmall matchlen_match4_match_nolit_calcBlockSizeSmall: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_calcBlockSizeSmall - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_calcBlockSizeSmall - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_calcBlockSizeSmall: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_calcBlockSizeSmall JB match_nolit_end_calcBlockSizeSmall - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_calcBlockSizeSmall - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_calcBlockSizeSmall matchlen_match1_match_nolit_calcBlockSizeSmall: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_calcBlockSizeSmall - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_calcBlockSizeSmall: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_calcBlockSizeSmall: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_calcBlockSizeSmall - LEAL -60(R9), R9 - ADDQ $0x03, AX + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_calcBlockSizeSmall two_byte_offset_short_match_nolit_calcBlockSizeSmall: - MOVL R9, BX - SHLL $0x02, BX - CMPL R9, $0x0c + MOVL R10, SI + SHLL $0x02, SI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_calcBlockSizeSmall - ADDQ $0x02, AX + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_calcBlockSizeSmall emit_copy_three_match_nolit_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX match_nolit_emitcopy_end_calcBlockSizeSmall: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_calcBlockSizeSmall - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_calcBlockSizeSmall - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET match_nolit_dst_ok_calcBlockSizeSmall: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x37, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x37, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x37, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x37, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_calcBlockSizeSmall - INCL CX + INCL DX JMP search_loop_calcBlockSizeSmall emit_remainder_calcBlockSizeSmall: - MOVQ src_len+8(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+8(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_calcBlockSizeSmall - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET emit_remainder_ok_calcBlockSizeSmall: - MOVQ src_len+8(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+8(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_calcBlockSizeSmall - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI - LEAL -1(SI), CX - CMPL CX, $0x3c + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI + LEAL -1(SI), AX + CMPL AX, $0x3c JB one_byte_emit_remainder_calcBlockSizeSmall - CMPL CX, $0x00000100 + CMPL AX, $0x00000100 JB two_bytes_emit_remainder_calcBlockSizeSmall JB three_bytes_emit_remainder_calcBlockSizeSmall three_bytes_emit_remainder_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_emit_remainder_calcBlockSizeSmall two_bytes_emit_remainder_calcBlockSizeSmall: - ADDQ $0x02, AX - CMPL CX, $0x40 + ADDQ $0x02, CX + CMPL AX, $0x40 JB memmove_emit_remainder_calcBlockSizeSmall JMP memmove_long_emit_remainder_calcBlockSizeSmall one_byte_emit_remainder_calcBlockSizeSmall: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_emit_remainder_calcBlockSizeSmall: - LEAQ (AX)(SI*1), AX + LEAQ (CX)(SI*1), AX + MOVQ AX, CX JMP emit_literal_done_emit_remainder_calcBlockSizeSmall memmove_long_emit_remainder_calcBlockSizeSmall: - LEAQ (AX)(SI*1), AX + LEAQ (CX)(SI*1), AX + MOVQ AX, CX emit_literal_done_emit_remainder_calcBlockSizeSmall: - MOVQ AX, ret+24(FP) + MOVQ CX, ret+32(FP) RET // func emitLiteral(dst []byte, lit []byte) int @@ -19783,7 +19809,7 @@ TEXT ·cvtLZ4BlockAsm(SB), NOSPLIT, $0-64 MOVQ src_base+24(FP), DX MOVQ src_len+32(FP), BX LEAQ (DX)(BX*1), BX - LEAQ -10(AX)(CX*1), CX + LEAQ -8(AX)(CX*1), CX XORQ DI, DI lz4_s2_loop: @@ -20266,7 +20292,7 @@ TEXT ·cvtLZ4sBlockAsm(SB), NOSPLIT, $0-64 MOVQ src_base+24(FP), DX MOVQ src_len+32(FP), BX LEAQ (DX)(BX*1), BX - LEAQ -10(AX)(CX*1), CX + LEAQ -8(AX)(CX*1), CX XORQ DI, DI lz4s_s2_loop: @@ -20751,7 +20777,7 @@ TEXT ·cvtLZ4BlockSnappyAsm(SB), NOSPLIT, $0-64 MOVQ src_base+24(FP), DX MOVQ src_len+32(FP), BX LEAQ (DX)(BX*1), BX - LEAQ -10(AX)(CX*1), CX + LEAQ -8(AX)(CX*1), CX lz4_snappy_loop: CMPQ DX, BX @@ -21017,7 +21043,7 @@ TEXT ·cvtLZ4sBlockSnappyAsm(SB), NOSPLIT, $0-64 MOVQ src_base+24(FP), DX MOVQ src_len+32(FP), BX LEAQ (DX)(BX*1), BX - LEAQ -10(AX)(CX*1), CX + LEAQ -8(AX)(CX*1), CX lz4s_snappy_loop: CMPQ DX, BX diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go index a79c4a527c6..8f8223cd3a6 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder.go @@ -6,6 +6,7 @@ package zstd import ( "crypto/rand" + "errors" "fmt" "io" "math" @@ -149,6 +150,9 @@ func (e *Encoder) ResetContentSize(w io.Writer, size int64) { // and write CRC if requested. func (e *Encoder) Write(p []byte) (n int, err error) { s := &e.state + if s.eofWritten { + return 0, ErrEncoderClosed + } for len(p) > 0 { if len(p)+len(s.filling) < e.o.blockSize { if e.o.crc { @@ -288,6 +292,9 @@ func (e *Encoder) nextBlock(final bool) error { s.filling, s.current, s.previous = s.previous[:0], s.filling, s.current s.nInput += int64(len(s.current)) s.wg.Add(1) + if final { + s.eofWritten = true + } go func(src []byte) { if debugEncoder { println("Adding block,", len(src), "bytes, final:", final) @@ -303,9 +310,6 @@ func (e *Encoder) nextBlock(final bool) error { blk := enc.Block() enc.Encode(blk, src) blk.last = final - if final { - s.eofWritten = true - } // Wait for pending writes. s.wWg.Wait() if s.writeErr != nil { @@ -401,12 +405,20 @@ func (e *Encoder) Flush() error { if len(s.filling) > 0 { err := e.nextBlock(false) if err != nil { + // Ignore Flush after Close. + if errors.Is(s.err, ErrEncoderClosed) { + return nil + } return err } } s.wg.Wait() s.wWg.Wait() if s.err != nil { + // Ignore Flush after Close. + if errors.Is(s.err, ErrEncoderClosed) { + return nil + } return s.err } return s.writeErr @@ -422,6 +434,9 @@ func (e *Encoder) Close() error { } err := e.nextBlock(true) if err != nil { + if errors.Is(s.err, ErrEncoderClosed) { + return nil + } return err } if s.frameContentSize > 0 { @@ -459,6 +474,11 @@ func (e *Encoder) Close() error { } _, s.err = s.w.Write(frame) } + if s.err == nil { + s.err = ErrEncoderClosed + return nil + } + return s.err } diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go index 4be7cc73671..066bef2a4f0 100644 --- a/vendor/github.com/klauspost/compress/zstd/zstd.go +++ b/vendor/github.com/klauspost/compress/zstd/zstd.go @@ -88,6 +88,10 @@ var ( // Close has been called. ErrDecoderClosed = errors.New("decoder used after Close") + // ErrEncoderClosed will be returned if the Encoder was used after + // Close has been called. + ErrEncoderClosed = errors.New("encoder used after Close") + // ErrDecoderNilInput is returned when a nil Reader was provided // and an operation other than Reset/DecodeAll/Close was attempted. ErrDecoderNilInput = errors.New("nil input provided as reader") diff --git a/vendor/github.com/prometheus/client_golang/prometheus/testutil/testutil.go b/vendor/github.com/prometheus/client_golang/prometheus/testutil/testutil.go index e0ac3466654..6f1200180a7 100644 --- a/vendor/github.com/prometheus/client_golang/prometheus/testutil/testutil.go +++ b/vendor/github.com/prometheus/client_golang/prometheus/testutil/testutil.go @@ -158,6 +158,9 @@ func GatherAndCount(g prometheus.Gatherer, metricNames ...string) (int, error) { // ScrapeAndCompare calls a remote exporter's endpoint which is expected to return some metrics in // plain text format. Then it compares it with the results that the `expected` would return. // If the `metricNames` is not empty it would filter the comparison only to the given metric names. +// +// NOTE: Be mindful of accidental discrepancies between expected and metricNames; metricNames filter +// both expected and scraped metrics. See https://github.com/prometheus/client_golang/issues/1351. func ScrapeAndCompare(url string, expected io.Reader, metricNames ...string) error { resp, err := http.Get(url) if err != nil { @@ -185,6 +188,9 @@ func ScrapeAndCompare(url string, expected io.Reader, metricNames ...string) err // CollectAndCompare collects the metrics identified by `metricNames` and compares them in the Prometheus text // exposition format to the data read from expected. +// +// NOTE: Be mindful of accidental discrepancies between expected and metricNames; metricNames filter +// both expected and collected metrics. See https://github.com/prometheus/client_golang/issues/1351. func CollectAndCompare(c prometheus.Collector, expected io.Reader, metricNames ...string) error { reg := prometheus.NewPedanticRegistry() if err := reg.Register(c); err != nil { @@ -197,6 +203,9 @@ func CollectAndCompare(c prometheus.Collector, expected io.Reader, metricNames . // it to an expected output read from the provided Reader in the Prometheus text // exposition format. If any metricNames are provided, only metrics with those // names are compared. +// +// NOTE: Be mindful of accidental discrepancies between expected and metricNames; metricNames filter +// both expected and gathered metrics. See https://github.com/prometheus/client_golang/issues/1351. func GatherAndCompare(g prometheus.Gatherer, expected io.Reader, metricNames ...string) error { return TransactionalGatherAndCompare(prometheus.ToTransactionalGatherer(g), expected, metricNames...) } @@ -205,6 +214,9 @@ func GatherAndCompare(g prometheus.Gatherer, expected io.Reader, metricNames ... // it to an expected output read from the provided Reader in the Prometheus text // exposition format. If any metricNames are provided, only metrics with those // names are compared. +// +// NOTE: Be mindful of accidental discrepancies between expected and metricNames; metricNames filter +// both expected and gathered metrics. See https://github.com/prometheus/client_golang/issues/1351. func TransactionalGatherAndCompare(g prometheus.TransactionalGatherer, expected io.Reader, metricNames ...string) error { got, done, err := g.Gather() defer done() @@ -277,15 +289,6 @@ func compareMetricFamilies(got, expected []*dto.MetricFamily, metricNames ...str if metricNames != nil { got = filterMetrics(got, metricNames) expected = filterMetrics(expected, metricNames) - if len(metricNames) > len(got) { - var missingMetricNames []string - for _, name := range metricNames { - if ok := hasMetricByName(got, name); !ok { - missingMetricNames = append(missingMetricNames, name) - } - } - return fmt.Errorf("expected metric name(s) not found: %v", missingMetricNames) - } } return compare(got, expected) @@ -327,12 +330,3 @@ func filterMetrics(metrics []*dto.MetricFamily, names []string) []*dto.MetricFam } return filtered } - -func hasMetricByName(metrics []*dto.MetricFamily, name string) bool { - for _, mf := range metrics { - if mf.GetName() == name { - return true - } - } - return false -} diff --git a/vendor/github.com/shirou/gopsutil/v4/common/env.go b/vendor/github.com/shirou/gopsutil/v4/common/env.go index 4acad1fd1e8..47e471c402f 100644 --- a/vendor/github.com/shirou/gopsutil/v4/common/env.go +++ b/vendor/github.com/shirou/gopsutil/v4/common/env.go @@ -12,13 +12,14 @@ type EnvKeyType string var EnvKey = EnvKeyType("env") const ( - HostProcEnvKey EnvKeyType = "HOST_PROC" - HostSysEnvKey EnvKeyType = "HOST_SYS" - HostEtcEnvKey EnvKeyType = "HOST_ETC" - HostVarEnvKey EnvKeyType = "HOST_VAR" - HostRunEnvKey EnvKeyType = "HOST_RUN" - HostDevEnvKey EnvKeyType = "HOST_DEV" - HostRootEnvKey EnvKeyType = "HOST_ROOT" + HostProcEnvKey EnvKeyType = "HOST_PROC" + HostSysEnvKey EnvKeyType = "HOST_SYS" + HostEtcEnvKey EnvKeyType = "HOST_ETC" + HostVarEnvKey EnvKeyType = "HOST_VAR" + HostRunEnvKey EnvKeyType = "HOST_RUN" + HostDevEnvKey EnvKeyType = "HOST_DEV" + HostRootEnvKey EnvKeyType = "HOST_ROOT" + HostProcMountinfo EnvKeyType = "HOST_PROC_MOUNTINFO" ) type EnvMap map[EnvKeyType]string diff --git a/vendor/github.com/shirou/gopsutil/v4/cpu/cpu_darwin.go b/vendor/github.com/shirou/gopsutil/v4/cpu/cpu_darwin.go index 79a458b8e21..b3e3a668de1 100644 --- a/vendor/github.com/shirou/gopsutil/v4/cpu/cpu_darwin.go +++ b/vendor/github.com/shirou/gopsutil/v4/cpu/cpu_darwin.go @@ -5,12 +5,15 @@ package cpu import ( "context" + "fmt" "strconv" "strings" + "unsafe" - "github.com/shoenig/go-m1cpu" "github.com/tklauser/go-sysconf" "golang.org/x/sys/unix" + + "github.com/shirou/gopsutil/v4/internal/common" ) // sys/resource.h @@ -23,6 +26,24 @@ const ( cpUStates = 5 ) +// mach/machine.h +const ( + cpuStateUser = 0 + cpuStateSystem = 1 + cpuStateIdle = 2 + cpuStateNice = 3 + cpuStateMax = 4 +) + +// mach/processor_info.h +const ( + processorCpuLoadInfo = 2 +) + +type hostCpuLoadInfoData struct { + cpuTicks [cpuStateMax]uint32 +} + // default value. from time.h var ClocksPerSec = float64(128) @@ -39,11 +60,17 @@ func Times(percpu bool) ([]TimesStat, error) { } func TimesWithContext(ctx context.Context, percpu bool) ([]TimesStat, error) { + lib, err := common.NewLibrary(common.System) + if err != nil { + return nil, err + } + defer lib.Close() + if percpu { - return perCPUTimes() + return perCPUTimes(lib) } - return allCPUTimes() + return allCPUTimes(lib) } // Returns only one CPUInfoStat on FreeBSD @@ -86,15 +113,9 @@ func InfoWithContext(ctx context.Context) ([]InfoStat, error) { c.CacheSize = int32(cacheSize) c.VendorID, _ = unix.Sysctl("machdep.cpu.vendor") - if m1cpu.IsAppleSilicon() { - c.Mhz = float64(m1cpu.PCoreHz() / 1_000_000) - } else { - // Use the rated frequency of the CPU. This is a static value and does not - // account for low power or Turbo Boost modes. - cpuFrequency, err := unix.SysctlUint64("hw.cpufrequency") - if err == nil { - c.Mhz = float64(cpuFrequency) / 1000000.0 - } + v, err := getFrequency() + if err == nil { + c.Mhz = v } return append(ret, c), nil @@ -115,3 +136,63 @@ func CountsWithContext(ctx context.Context, logical bool) (int, error) { return int(count), nil } + +func perCPUTimes(machLib *common.Library) ([]TimesStat, error) { + machHostSelf := common.GetFunc[common.MachHostSelfFunc](machLib, common.MachHostSelfSym) + machTaskSelf := common.GetFunc[common.MachTaskSelfFunc](machLib, common.MachTaskSelfSym) + hostProcessorInfo := common.GetFunc[common.HostProcessorInfoFunc](machLib, common.HostProcessorInfoSym) + vmDeallocate := common.GetFunc[common.VMDeallocateFunc](machLib, common.VMDeallocateSym) + + var count, ncpu uint32 + var cpuload *hostCpuLoadInfoData + + status := hostProcessorInfo(machHostSelf(), processorCpuLoadInfo, &ncpu, uintptr(unsafe.Pointer(&cpuload)), &count) + + if status != common.KERN_SUCCESS { + return nil, fmt.Errorf("host_processor_info error=%d", status) + } + + defer vmDeallocate(machTaskSelf(), uintptr(unsafe.Pointer(cpuload)), uintptr(ncpu)) + + ret := []TimesStat{} + loads := unsafe.Slice(cpuload, ncpu) + + for i := 0; i < int(ncpu); i++ { + c := TimesStat{ + CPU: fmt.Sprintf("cpu%d", i), + User: float64(loads[i].cpuTicks[cpuStateUser]) / ClocksPerSec, + System: float64(loads[i].cpuTicks[cpuStateSystem]) / ClocksPerSec, + Nice: float64(loads[i].cpuTicks[cpuStateNice]) / ClocksPerSec, + Idle: float64(loads[i].cpuTicks[cpuStateIdle]) / ClocksPerSec, + } + + ret = append(ret, c) + } + + return ret, nil +} + +func allCPUTimes(machLib *common.Library) ([]TimesStat, error) { + machHostSelf := common.GetFunc[common.MachHostSelfFunc](machLib, common.MachHostSelfSym) + hostStatistics := common.GetFunc[common.HostStatisticsFunc](machLib, common.HostStatisticsSym) + + var cpuload hostCpuLoadInfoData + count := uint32(cpuStateMax) + + status := hostStatistics(machHostSelf(), common.HOST_CPU_LOAD_INFO, + uintptr(unsafe.Pointer(&cpuload)), &count) + + if status != common.KERN_SUCCESS { + return nil, fmt.Errorf("host_statistics error=%d", status) + } + + c := TimesStat{ + CPU: "cpu-total", + User: float64(cpuload.cpuTicks[cpuStateUser]) / ClocksPerSec, + System: float64(cpuload.cpuTicks[cpuStateSystem]) / ClocksPerSec, + Nice: float64(cpuload.cpuTicks[cpuStateNice]) / ClocksPerSec, + Idle: float64(cpuload.cpuTicks[cpuStateIdle]) / ClocksPerSec, + } + + return []TimesStat{c}, nil +} diff --git a/vendor/github.com/shirou/gopsutil/v4/cpu/cpu_darwin_arm64.go b/vendor/github.com/shirou/gopsutil/v4/cpu/cpu_darwin_arm64.go new file mode 100644 index 00000000000..50318424390 --- /dev/null +++ b/vendor/github.com/shirou/gopsutil/v4/cpu/cpu_darwin_arm64.go @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: BSD-3-Clause +//go:build darwin && arm64 + +package cpu + +import ( + "encoding/binary" + "fmt" + "unsafe" + + "github.com/shirou/gopsutil/v4/internal/common" +) + +// https://github.com/shoenig/go-m1cpu/blob/v0.1.6/cpu.go +func getFrequency() (float64, error) { + ioKit, err := common.NewLibrary(common.IOKit) + if err != nil { + return 0, err + } + defer ioKit.Close() + + coreFoundation, err := common.NewLibrary(common.CoreFoundation) + if err != nil { + return 0, err + } + defer coreFoundation.Close() + + ioServiceMatching := common.GetFunc[common.IOServiceMatchingFunc](ioKit, common.IOServiceMatchingSym) + ioServiceGetMatchingServices := common.GetFunc[common.IOServiceGetMatchingServicesFunc](ioKit, common.IOServiceGetMatchingServicesSym) + ioIteratorNext := common.GetFunc[common.IOIteratorNextFunc](ioKit, common.IOIteratorNextSym) + ioRegistryEntryGetName := common.GetFunc[common.IORegistryEntryGetNameFunc](ioKit, common.IORegistryEntryGetNameSym) + ioRegistryEntryCreateCFProperty := common.GetFunc[common.IORegistryEntryCreateCFPropertyFunc](ioKit, common.IORegistryEntryCreateCFPropertySym) + ioObjectRelease := common.GetFunc[common.IOObjectReleaseFunc](ioKit, common.IOObjectReleaseSym) + + cfStringCreateWithCString := common.GetFunc[common.CFStringCreateWithCStringFunc](coreFoundation, common.CFStringCreateWithCStringSym) + cfDataGetLength := common.GetFunc[common.CFDataGetLengthFunc](coreFoundation, common.CFDataGetLengthSym) + cfDataGetBytePtr := common.GetFunc[common.CFDataGetBytePtrFunc](coreFoundation, common.CFDataGetBytePtrSym) + cfRelease := common.GetFunc[common.CFReleaseFunc](coreFoundation, common.CFReleaseSym) + + matching := ioServiceMatching("AppleARMIODevice") + + var iterator uint32 + if status := ioServiceGetMatchingServices(common.KIOMainPortDefault, uintptr(matching), &iterator); status != common.KERN_SUCCESS { + return 0.0, fmt.Errorf("IOServiceGetMatchingServices error=%d", status) + } + defer ioObjectRelease(iterator) + + pCorekey := cfStringCreateWithCString(common.KCFAllocatorDefault, "voltage-states5-sram", common.KCFStringEncodingUTF8) + defer cfRelease(uintptr(pCorekey)) + + var pCoreHz uint32 + for { + service := ioIteratorNext(iterator) + if !(service > 0) { + break + } + + buf := make([]byte, 512) + ioRegistryEntryGetName(service, &buf[0]) + + if common.GoString(&buf[0]) == "pmgr" { + pCoreRef := ioRegistryEntryCreateCFProperty(service, uintptr(pCorekey), common.KCFAllocatorDefault, common.KNilOptions) + length := cfDataGetLength(uintptr(pCoreRef)) + data := cfDataGetBytePtr(uintptr(pCoreRef)) + + // composite uint32 from the byte array + buf := unsafe.Slice((*byte)(data), length) + + // combine the bytes into a uint32 value + b := buf[length-8 : length-4] + pCoreHz = binary.LittleEndian.Uint32(b) + ioObjectRelease(service) + break + } + + ioObjectRelease(service) + } + + return float64(pCoreHz / 1_000_000), nil +} diff --git a/vendor/github.com/shirou/gopsutil/v4/cpu/cpu_darwin_cgo.go b/vendor/github.com/shirou/gopsutil/v4/cpu/cpu_darwin_cgo.go deleted file mode 100644 index 3a02024c5be..00000000000 --- a/vendor/github.com/shirou/gopsutil/v4/cpu/cpu_darwin_cgo.go +++ /dev/null @@ -1,111 +0,0 @@ -// SPDX-License-Identifier: BSD-3-Clause -//go:build darwin && cgo - -package cpu - -/* -#include -#include -#include -#include -#include -#include -#include -#if TARGET_OS_MAC -#include -#endif -#include -#include -*/ -import "C" - -import ( - "bytes" - "encoding/binary" - "fmt" - "unsafe" -) - -// these CPU times for darwin is borrowed from influxdb/telegraf. - -func perCPUTimes() ([]TimesStat, error) { - var ( - count C.mach_msg_type_number_t - cpuload *C.processor_cpu_load_info_data_t - ncpu C.natural_t - ) - - status := C.host_processor_info(C.host_t(C.mach_host_self()), - C.PROCESSOR_CPU_LOAD_INFO, - &ncpu, - (*C.processor_info_array_t)(unsafe.Pointer(&cpuload)), - &count) - - if status != C.KERN_SUCCESS { - return nil, fmt.Errorf("host_processor_info error=%d", status) - } - - // jump through some cgo casting hoops and ensure we properly free - // the memory that cpuload points to - target := C.vm_map_t(C.mach_task_self_) - address := C.vm_address_t(uintptr(unsafe.Pointer(cpuload))) - defer C.vm_deallocate(target, address, C.vm_size_t(ncpu)) - - // the body of struct processor_cpu_load_info - // aka processor_cpu_load_info_data_t - var cpu_ticks [C.CPU_STATE_MAX]uint32 - - // copy the cpuload array to a []byte buffer - // where we can binary.Read the data - size := int(ncpu) * binary.Size(cpu_ticks) - buf := (*[1 << 30]byte)(unsafe.Pointer(cpuload))[:size:size] - - bbuf := bytes.NewBuffer(buf) - - var ret []TimesStat - - for i := 0; i < int(ncpu); i++ { - err := binary.Read(bbuf, binary.LittleEndian, &cpu_ticks) - if err != nil { - return nil, err - } - - c := TimesStat{ - CPU: fmt.Sprintf("cpu%d", i), - User: float64(cpu_ticks[C.CPU_STATE_USER]) / ClocksPerSec, - System: float64(cpu_ticks[C.CPU_STATE_SYSTEM]) / ClocksPerSec, - Nice: float64(cpu_ticks[C.CPU_STATE_NICE]) / ClocksPerSec, - Idle: float64(cpu_ticks[C.CPU_STATE_IDLE]) / ClocksPerSec, - } - - ret = append(ret, c) - } - - return ret, nil -} - -func allCPUTimes() ([]TimesStat, error) { - var count C.mach_msg_type_number_t - var cpuload C.host_cpu_load_info_data_t - - count = C.HOST_CPU_LOAD_INFO_COUNT - - status := C.host_statistics(C.host_t(C.mach_host_self()), - C.HOST_CPU_LOAD_INFO, - C.host_info_t(unsafe.Pointer(&cpuload)), - &count) - - if status != C.KERN_SUCCESS { - return nil, fmt.Errorf("host_statistics error=%d", status) - } - - c := TimesStat{ - CPU: "cpu-total", - User: float64(cpuload.cpu_ticks[C.CPU_STATE_USER]) / ClocksPerSec, - System: float64(cpuload.cpu_ticks[C.CPU_STATE_SYSTEM]) / ClocksPerSec, - Nice: float64(cpuload.cpu_ticks[C.CPU_STATE_NICE]) / ClocksPerSec, - Idle: float64(cpuload.cpu_ticks[C.CPU_STATE_IDLE]) / ClocksPerSec, - } - - return []TimesStat{c}, nil -} diff --git a/vendor/github.com/shirou/gopsutil/v4/cpu/cpu_darwin_fallback.go b/vendor/github.com/shirou/gopsutil/v4/cpu/cpu_darwin_fallback.go new file mode 100644 index 00000000000..b9e52aba176 --- /dev/null +++ b/vendor/github.com/shirou/gopsutil/v4/cpu/cpu_darwin_fallback.go @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: BSD-3-Clause +//go:build darwin && !arm64 + +package cpu + +import "golang.org/x/sys/unix" + +func getFrequency() (float64, error) { + // Use the rated frequency of the CPU. This is a static value and does not + // account for low power or Turbo Boost modes. + cpuFrequency, err := unix.SysctlUint64("hw.cpufrequency") + return float64(cpuFrequency) / 1000000.0, err +} diff --git a/vendor/github.com/shirou/gopsutil/v4/cpu/cpu_darwin_nocgo.go b/vendor/github.com/shirou/gopsutil/v4/cpu/cpu_darwin_nocgo.go deleted file mode 100644 index 1af8566a67b..00000000000 --- a/vendor/github.com/shirou/gopsutil/v4/cpu/cpu_darwin_nocgo.go +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-License-Identifier: BSD-3-Clause -//go:build darwin && !cgo - -package cpu - -import "github.com/shirou/gopsutil/v4/internal/common" - -func perCPUTimes() ([]TimesStat, error) { - return []TimesStat{}, common.ErrNotImplementedError -} - -func allCPUTimes() ([]TimesStat, error) { - return []TimesStat{}, common.ErrNotImplementedError -} diff --git a/vendor/github.com/shirou/gopsutil/v4/internal/common/common_darwin.go b/vendor/github.com/shirou/gopsutil/v4/internal/common/common_darwin.go index 53f9ae8d9a5..b473f88666e 100644 --- a/vendor/github.com/shirou/gopsutil/v4/internal/common/common_darwin.go +++ b/vendor/github.com/shirou/gopsutil/v4/internal/common/common_darwin.go @@ -5,11 +5,13 @@ package common import ( "context" + "fmt" "os" "os/exec" "strings" "unsafe" + "github.com/ebitengine/purego" "golang.org/x/sys/unix" ) @@ -64,3 +66,299 @@ func CallSyscall(mib []int32) ([]byte, uint64, error) { return buf, length, nil } + +// Library represents a dynamic library loaded by purego. +type Library struct { + addr uintptr + path string + close func() +} + +// library paths +const ( + IOKit = "/System/Library/Frameworks/IOKit.framework/IOKit" + CoreFoundation = "/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation" + System = "/usr/lib/libSystem.B.dylib" +) + +func NewLibrary(path string) (*Library, error) { + lib, err := purego.Dlopen(path, purego.RTLD_LAZY|purego.RTLD_GLOBAL) + if err != nil { + return nil, err + } + + closeFunc := func() { + purego.Dlclose(lib) + } + + return &Library{ + addr: lib, + path: path, + close: closeFunc, + }, nil +} + +func (lib *Library) Dlsym(symbol string) (uintptr, error) { + return purego.Dlsym(lib.addr, symbol) +} + +func GetFunc[T any](lib *Library, symbol string) T { + var fptr T + purego.RegisterLibFunc(&fptr, lib.addr, symbol) + return fptr +} + +func (lib *Library) Close() { + lib.close() +} + +// status codes +const ( + KERN_SUCCESS = 0 +) + +// IOKit functions and symbols. +type ( + IOServiceGetMatchingServiceFunc func(mainPort uint32, matching uintptr) uint32 + IOServiceGetMatchingServicesFunc func(mainPort uint32, matching uintptr, existing *uint32) int + IOServiceMatchingFunc func(name string) unsafe.Pointer + IOServiceOpenFunc func(service, owningTask, connType uint32, connect *uint32) int + IOServiceCloseFunc func(connect uint32) int + IOIteratorNextFunc func(iterator uint32) uint32 + IORegistryEntryGetNameFunc func(entry uint32, name *byte) int + IORegistryEntryGetParentEntryFunc func(entry uint32, plane string, parent *uint32) int + IORegistryEntryCreateCFPropertyFunc func(entry uint32, key, allocator uintptr, options uint32) unsafe.Pointer + IORegistryEntryCreateCFPropertiesFunc func(entry uint32, properties unsafe.Pointer, allocator uintptr, options uint32) int + IOObjectConformsToFunc func(object uint32, className string) bool + IOObjectReleaseFunc func(object uint32) int + IOConnectCallStructMethodFunc func(connection, selector uint32, inputStruct, inputStructCnt, outputStruct uintptr, outputStructCnt *uintptr) int + + IOHIDEventSystemClientCreateFunc func(allocator uintptr) unsafe.Pointer + IOHIDEventSystemClientSetMatchingFunc func(client, match uintptr) int + IOHIDServiceClientCopyEventFunc func(service uintptr, eventType int64, + options int32, timeout int64) unsafe.Pointer + IOHIDServiceClientCopyPropertyFunc func(service, property uintptr) unsafe.Pointer + IOHIDEventGetFloatValueFunc func(event uintptr, field int32) float64 + IOHIDEventSystemClientCopyServicesFunc func(client uintptr) unsafe.Pointer +) + +const ( + IOServiceGetMatchingServiceSym = "IOServiceGetMatchingService" + IOServiceGetMatchingServicesSym = "IOServiceGetMatchingServices" + IOServiceMatchingSym = "IOServiceMatching" + IOServiceOpenSym = "IOServiceOpen" + IOServiceCloseSym = "IOServiceClose" + IOIteratorNextSym = "IOIteratorNext" + IORegistryEntryGetNameSym = "IORegistryEntryGetName" + IORegistryEntryGetParentEntrySym = "IORegistryEntryGetParentEntry" + IORegistryEntryCreateCFPropertySym = "IORegistryEntryCreateCFProperty" + IORegistryEntryCreateCFPropertiesSym = "IORegistryEntryCreateCFProperties" + IOObjectConformsToSym = "IOObjectConformsTo" + IOObjectReleaseSym = "IOObjectRelease" + IOConnectCallStructMethodSym = "IOConnectCallStructMethod" + + IOHIDEventSystemClientCreateSym = "IOHIDEventSystemClientCreate" + IOHIDEventSystemClientSetMatchingSym = "IOHIDEventSystemClientSetMatching" + IOHIDServiceClientCopyEventSym = "IOHIDServiceClientCopyEvent" + IOHIDServiceClientCopyPropertySym = "IOHIDServiceClientCopyProperty" + IOHIDEventGetFloatValueSym = "IOHIDEventGetFloatValue" + IOHIDEventSystemClientCopyServicesSym = "IOHIDEventSystemClientCopyServices" +) + +const ( + KIOMainPortDefault = 0 + + KIOHIDEventTypeTemperature = 15 + + KNilOptions = 0 +) + +const ( + KIOMediaWholeKey = "Media" + KIOServicePlane = "IOService" +) + +// CoreFoundation functions and symbols. +type ( + CFGetTypeIDFunc func(cf uintptr) int32 + CFNumberCreateFunc func(allocator uintptr, theType int32, valuePtr uintptr) unsafe.Pointer + CFNumberGetValueFunc func(num uintptr, theType int32, valuePtr uintptr) bool + CFDictionaryCreateFunc func(allocator uintptr, keys, values *unsafe.Pointer, numValues int32, + keyCallBacks, valueCallBacks uintptr) unsafe.Pointer + CFDictionaryAddValueFunc func(theDict, key, value uintptr) + CFDictionaryGetValueFunc func(theDict, key uintptr) unsafe.Pointer + CFArrayGetCountFunc func(theArray uintptr) int32 + CFArrayGetValueAtIndexFunc func(theArray uintptr, index int32) unsafe.Pointer + CFStringCreateMutableFunc func(alloc uintptr, maxLength int32) unsafe.Pointer + CFStringGetLengthFunc func(theString uintptr) int32 + CFStringGetCStringFunc func(theString uintptr, buffer *byte, bufferSize int32, encoding uint32) + CFStringCreateWithCStringFunc func(alloc uintptr, cStr string, encoding uint32) unsafe.Pointer + CFDataGetLengthFunc func(theData uintptr) int32 + CFDataGetBytePtrFunc func(theData uintptr) unsafe.Pointer + CFReleaseFunc func(cf uintptr) +) + +const ( + CFGetTypeIDSym = "CFGetTypeID" + CFNumberCreateSym = "CFNumberCreate" + CFNumberGetValueSym = "CFNumberGetValue" + CFDictionaryCreateSym = "CFDictionaryCreate" + CFDictionaryAddValueSym = "CFDictionaryAddValue" + CFDictionaryGetValueSym = "CFDictionaryGetValue" + CFArrayGetCountSym = "CFArrayGetCount" + CFArrayGetValueAtIndexSym = "CFArrayGetValueAtIndex" + CFStringCreateMutableSym = "CFStringCreateMutable" + CFStringGetLengthSym = "CFStringGetLength" + CFStringGetCStringSym = "CFStringGetCString" + CFStringCreateWithCStringSym = "CFStringCreateWithCString" + CFDataGetLengthSym = "CFDataGetLength" + CFDataGetBytePtrSym = "CFDataGetBytePtr" + CFReleaseSym = "CFRelease" +) + +const ( + KCFStringEncodingUTF8 = 0x08000100 + KCFNumberSInt64Type = 4 + KCFNumberIntType = 9 + KCFAllocatorDefault = 0 +) + +// Kernel functions and symbols. +type MachTimeBaseInfo struct { + Numer uint32 + Denom uint32 +} + +type ( + HostProcessorInfoFunc func(host uint32, flavor int32, outProcessorCount *uint32, outProcessorInfo uintptr, + outProcessorInfoCnt *uint32) int + HostStatisticsFunc func(host uint32, flavor int32, hostInfoOut uintptr, hostInfoOutCnt *uint32) int + MachHostSelfFunc func() uint32 + MachTaskSelfFunc func() uint32 + MachTimeBaseInfoFunc func(info uintptr) int + VMDeallocateFunc func(targetTask uint32, vmAddress, vmSize uintptr) int +) + +const ( + HostProcessorInfoSym = "host_processor_info" + HostStatisticsSym = "host_statistics" + MachHostSelfSym = "mach_host_self" + MachTaskSelfSym = "mach_task_self" + MachTimeBaseInfoSym = "mach_timebase_info" + VMDeallocateSym = "vm_deallocate" +) + +const ( + CTL_KERN = 1 + KERN_ARGMAX = 8 + KERN_PROCARGS2 = 49 + + HOST_VM_INFO = 2 + HOST_CPU_LOAD_INFO = 3 + + HOST_VM_INFO_COUNT = 0xf +) + +// System functions and symbols. +type ( + ProcPidPathFunc func(pid int32, buffer uintptr, bufferSize uint32) int32 + ProcPidInfoFunc func(pid, flavor int32, arg uint64, buffer uintptr, bufferSize int32) int32 +) + +const ( + SysctlSym = "sysctl" + ProcPidPathSym = "proc_pidpath" + ProcPidInfoSym = "proc_pidinfo" +) + +const ( + MAXPATHLEN = 1024 + PROC_PIDPATHINFO_MAXSIZE = 4 * MAXPATHLEN + PROC_PIDTASKINFO = 4 + PROC_PIDVNODEPATHINFO = 9 +) + +// SMC represents a SMC instance. +type SMC struct { + lib *Library + conn uint32 + callStruct IOConnectCallStructMethodFunc +} + +const ioServiceSMC = "AppleSMC" + +const ( + KSMCUserClientOpen = 0 + KSMCUserClientClose = 1 + KSMCHandleYPCEvent = 2 + KSMCReadKey = 5 + KSMCWriteKey = 6 + KSMCGetKeyCount = 7 + KSMCGetKeyFromIndex = 8 + KSMCGetKeyInfo = 9 +) + +const ( + KSMCSuccess = 0 + KSMCError = 1 + KSMCKeyNotFound = 132 +) + +func NewSMC(ioKit *Library) (*SMC, error) { + if ioKit.path != IOKit { + return nil, fmt.Errorf("library is not IOKit") + } + + ioServiceGetMatchingService := GetFunc[IOServiceGetMatchingServiceFunc](ioKit, IOServiceGetMatchingServiceSym) + ioServiceMatching := GetFunc[IOServiceMatchingFunc](ioKit, IOServiceMatchingSym) + ioServiceOpen := GetFunc[IOServiceOpenFunc](ioKit, IOServiceOpenSym) + ioObjectRelease := GetFunc[IOObjectReleaseFunc](ioKit, IOObjectReleaseSym) + machTaskSelf := GetFunc[MachTaskSelfFunc](ioKit, MachTaskSelfSym) + + ioConnectCallStructMethod := GetFunc[IOConnectCallStructMethodFunc](ioKit, IOConnectCallStructMethodSym) + + service := ioServiceGetMatchingService(0, uintptr(ioServiceMatching(ioServiceSMC))) + if service == 0 { + return nil, fmt.Errorf("ERROR: %s NOT FOUND", ioServiceSMC) + } + + var conn uint32 + if result := ioServiceOpen(service, machTaskSelf(), 0, &conn); result != 0 { + return nil, fmt.Errorf("ERROR: IOServiceOpen failed") + } + + ioObjectRelease(service) + return &SMC{ + lib: ioKit, + conn: conn, + callStruct: ioConnectCallStructMethod, + }, nil +} + +func (s *SMC) CallStruct(selector uint32, inputStruct, inputStructCnt, outputStruct uintptr, outputStructCnt *uintptr) int { + return s.callStruct(s.conn, selector, inputStruct, inputStructCnt, outputStruct, outputStructCnt) +} + +func (s *SMC) Close() error { + ioServiceClose := GetFunc[IOServiceCloseFunc](s.lib, IOServiceCloseSym) + + if result := ioServiceClose(s.conn); result != 0 { + return fmt.Errorf("ERROR: IOServiceClose failed") + } + return nil +} + +// https://github.com/ebitengine/purego/blob/main/internal/strings/strings.go#L26 +func GoString(cStr *byte) string { + if cStr == nil { + return "" + } + var length int + for { + if *(*byte)(unsafe.Add(unsafe.Pointer(cStr), uintptr(length))) == '\x00' { + break + } + length++ + } + return string(unsafe.Slice(cStr, length)) +} diff --git a/vendor/github.com/shirou/gopsutil/v4/internal/common/common_unix.go b/vendor/github.com/shirou/gopsutil/v4/internal/common/common_unix.go index 2715b890bef..c9f91b1698a 100644 --- a/vendor/github.com/shirou/gopsutil/v4/internal/common/common_unix.go +++ b/vendor/github.com/shirou/gopsutil/v4/internal/common/common_unix.go @@ -40,23 +40,3 @@ func CallLsofWithContext(ctx context.Context, invoke Invoker, pid int32, args .. } return ret, nil } - -func CallPgrepWithContext(ctx context.Context, invoke Invoker, pid int32) ([]int32, error) { - out, err := invoke.CommandWithContext(ctx, "pgrep", "-P", strconv.Itoa(int(pid))) - if err != nil { - return []int32{}, err - } - lines := strings.Split(string(out), "\n") - ret := make([]int32, 0, len(lines)) - for _, l := range lines { - if len(l) == 0 { - continue - } - i, err := strconv.ParseInt(l, 10, 32) - if err != nil { - continue - } - ret = append(ret, int32(i)) - } - return ret, nil -} diff --git a/vendor/github.com/shirou/gopsutil/v4/mem/mem_darwin.go b/vendor/github.com/shirou/gopsutil/v4/mem/mem_darwin.go index a33c5f125a2..a4c15f6915d 100644 --- a/vendor/github.com/shirou/gopsutil/v4/mem/mem_darwin.go +++ b/vendor/github.com/shirou/gopsutil/v4/mem/mem_darwin.go @@ -70,3 +70,61 @@ func SwapDevices() ([]*SwapDevice, error) { func SwapDevicesWithContext(ctx context.Context) ([]*SwapDevice, error) { return nil, common.ErrNotImplementedError } + +type vmStatisticsData struct { + freeCount uint32 + activeCount uint32 + inactiveCount uint32 + wireCount uint32 + _ [44]byte // Not used here +} + +// VirtualMemory returns VirtualmemoryStat. +func VirtualMemory() (*VirtualMemoryStat, error) { + return VirtualMemoryWithContext(context.Background()) +} + +func VirtualMemoryWithContext(ctx context.Context) (*VirtualMemoryStat, error) { + machLib, err := common.NewLibrary(common.System) + if err != nil { + return nil, err + } + defer machLib.Close() + + hostStatistics := common.GetFunc[common.HostStatisticsFunc](machLib, common.HostStatisticsSym) + machHostSelf := common.GetFunc[common.MachHostSelfFunc](machLib, common.MachHostSelfSym) + + count := uint32(common.HOST_VM_INFO_COUNT) + var vmstat vmStatisticsData + + status := hostStatistics(machHostSelf(), common.HOST_VM_INFO, + uintptr(unsafe.Pointer(&vmstat)), &count) + + if status != common.KERN_SUCCESS { + return nil, fmt.Errorf("host_statistics error=%d", status) + } + + pageSizeAddr, _ := machLib.Dlsym("vm_kernel_page_size") + pageSize := **(**uint64)(unsafe.Pointer(&pageSizeAddr)) + total, err := getHwMemsize() + if err != nil { + return nil, err + } + totalCount := uint32(total / pageSize) + + availableCount := vmstat.inactiveCount + vmstat.freeCount + usedPercent := 100 * float64(totalCount-availableCount) / float64(totalCount) + + usedCount := totalCount - availableCount + + return &VirtualMemoryStat{ + Total: total, + Available: pageSize * uint64(availableCount), + Used: pageSize * uint64(usedCount), + UsedPercent: usedPercent, + Free: pageSize * uint64(vmstat.freeCount), + Active: pageSize * uint64(vmstat.activeCount), + Inactive: pageSize * uint64(vmstat.inactiveCount), + Wired: pageSize * uint64(vmstat.wireCount), + }, nil +} diff --git a/vendor/github.com/shirou/gopsutil/v4/mem/mem_darwin_cgo.go b/vendor/github.com/shirou/gopsutil/v4/mem/mem_darwin_cgo.go deleted file mode 100644 index cc6657d045c..00000000000 --- a/vendor/github.com/shirou/gopsutil/v4/mem/mem_darwin_cgo.go +++ /dev/null @@ -1,58 +0,0 @@ -// SPDX-License-Identifier: BSD-3-Clause -//go:build darwin && cgo - -package mem - -/* -#include -#include -*/ -import "C" - -import ( - "context" - "fmt" - "unsafe" -) - -// VirtualMemory returns VirtualmemoryStat. -func VirtualMemory() (*VirtualMemoryStat, error) { - return VirtualMemoryWithContext(context.Background()) -} - -func VirtualMemoryWithContext(ctx context.Context) (*VirtualMemoryStat, error) { - count := C.mach_msg_type_number_t(C.HOST_VM_INFO_COUNT) - var vmstat C.vm_statistics_data_t - - status := C.host_statistics(C.host_t(C.mach_host_self()), - C.HOST_VM_INFO, - C.host_info_t(unsafe.Pointer(&vmstat)), - &count) - - if status != C.KERN_SUCCESS { - return nil, fmt.Errorf("host_statistics error=%d", status) - } - - pageSize := uint64(C.vm_kernel_page_size) - total, err := getHwMemsize() - if err != nil { - return nil, err - } - totalCount := C.natural_t(total / pageSize) - - availableCount := vmstat.inactive_count + vmstat.free_count - usedPercent := 100 * float64(totalCount-availableCount) / float64(totalCount) - - usedCount := totalCount - availableCount - - return &VirtualMemoryStat{ - Total: total, - Available: pageSize * uint64(availableCount), - Used: pageSize * uint64(usedCount), - UsedPercent: usedPercent, - Free: pageSize * uint64(vmstat.free_count), - Active: pageSize * uint64(vmstat.active_count), - Inactive: pageSize * uint64(vmstat.inactive_count), - Wired: pageSize * uint64(vmstat.wire_count), - }, nil -} diff --git a/vendor/github.com/shirou/gopsutil/v4/mem/mem_darwin_nocgo.go b/vendor/github.com/shirou/gopsutil/v4/mem/mem_darwin_nocgo.go deleted file mode 100644 index 097a93e63e4..00000000000 --- a/vendor/github.com/shirou/gopsutil/v4/mem/mem_darwin_nocgo.go +++ /dev/null @@ -1,89 +0,0 @@ -// SPDX-License-Identifier: BSD-3-Clause -//go:build darwin && !cgo - -package mem - -import ( - "context" - "strconv" - "strings" - - "golang.org/x/sys/unix" -) - -// Runs vm_stat and returns Free and inactive pages -func getVMStat(vms *VirtualMemoryStat) error { - out, err := invoke.Command("vm_stat") - if err != nil { - return err - } - return parseVMStat(string(out), vms) -} - -func parseVMStat(out string, vms *VirtualMemoryStat) error { - var err error - - lines := strings.Split(out, "\n") - pagesize := uint64(unix.Getpagesize()) - for _, line := range lines { - fields := strings.Split(line, ":") - if len(fields) < 2 { - continue - } - key := strings.TrimSpace(fields[0]) - value := strings.Trim(fields[1], " .") - switch key { - case "Pages free": - free, e := strconv.ParseUint(value, 10, 64) - if e != nil { - err = e - } - vms.Free = free * pagesize - case "Pages inactive": - inactive, e := strconv.ParseUint(value, 10, 64) - if e != nil { - err = e - } - vms.Inactive = inactive * pagesize - case "Pages active": - active, e := strconv.ParseUint(value, 10, 64) - if e != nil { - err = e - } - vms.Active = active * pagesize - case "Pages wired down": - wired, e := strconv.ParseUint(value, 10, 64) - if e != nil { - err = e - } - vms.Wired = wired * pagesize - } - } - return err -} - -// VirtualMemory returns VirtualmemoryStat. -func VirtualMemory() (*VirtualMemoryStat, error) { - return VirtualMemoryWithContext(context.Background()) -} - -func VirtualMemoryWithContext(ctx context.Context) (*VirtualMemoryStat, error) { - ret := &VirtualMemoryStat{} - - total, err := getHwMemsize() - if err != nil { - return nil, err - } - err = getVMStat(ret) - if err != nil { - return nil, err - } - - ret.Available = ret.Free + ret.Inactive - ret.Total = total - - ret.Used = ret.Total - ret.Available - ret.UsedPercent = 100 * float64(ret.Used) / float64(ret.Total) - - return ret, nil -} diff --git a/vendor/github.com/shirou/gopsutil/v4/process/process.go b/vendor/github.com/shirou/gopsutil/v4/process/process.go index d73f1f972fa..70411c61644 100644 --- a/vendor/github.com/shirou/gopsutil/v4/process/process.go +++ b/vendor/github.com/shirou/gopsutil/v4/process/process.go @@ -18,7 +18,7 @@ import ( var ( invoke common.Invoker = common.Invoke{} - ErrorNoChildren = errors.New("process does not have children") + ErrorNoChildren = errors.New("process does not have children") // Deprecated: ErrorNoChildren is never returned by process.Children(), check its returned []*Process slice length instead ErrorProcessNotRunning = errors.New("process does not exist") ErrorNotPermitted = errors.New("operation not permitted") ) diff --git a/vendor/github.com/shirou/gopsutil/v4/process/process_darwin.go b/vendor/github.com/shirou/gopsutil/v4/process/process_darwin.go index 66b3684eae4..05c7562b767 100644 --- a/vendor/github.com/shirou/gopsutil/v4/process/process_darwin.go +++ b/vendor/github.com/shirou/gopsutil/v4/process/process_darwin.go @@ -4,15 +4,20 @@ package process import ( + "bytes" "context" + "encoding/binary" "fmt" "path/filepath" + "runtime" + "sort" "strconv" "strings" + "unsafe" - "github.com/tklauser/go-sysconf" "golang.org/x/sys/unix" + "github.com/shirou/gopsutil/v4/cpu" "github.com/shirou/gopsutil/v4/internal/common" "github.com/shirou/gopsutil/v4/net" ) @@ -27,16 +32,6 @@ const ( KernProcPathname = 12 // path to executable ) -var clockTicks = 100 // default value - -func init() { - clkTck, err := sysconf.Sysconf(sysconf.SC_CLK_TCK) - // ignore errors - if err == nil { - clockTicks = int(clkTck) - } -} - type _Ctype_struct___0 struct { Pad uint64 } @@ -186,65 +181,22 @@ func (p *Process) IOCountersWithContext(ctx context.Context) (*IOCountersStat, e return nil, common.ErrNotImplementedError } -func convertCPUTimes(s string) (ret float64, err error) { - var t int - var _tmp string - if strings.Contains(s, ":") { - _t := strings.Split(s, ":") - switch len(_t) { - case 3: - hour, err := strconv.ParseInt(_t[0], 10, 32) - if err != nil { - return ret, err - } - t += int(hour) * 60 * 60 * clockTicks - - mins, err := strconv.ParseInt(_t[1], 10, 32) - if err != nil { - return ret, err - } - t += int(mins) * 60 * clockTicks - _tmp = _t[2] - case 2: - mins, err := strconv.ParseInt(_t[0], 10, 32) - if err != nil { - return ret, err - } - t += int(mins) * 60 * clockTicks - _tmp = _t[1] - case 1, 0: - _tmp = s - default: - return ret, fmt.Errorf("wrong cpu time string") - } - } else { - _tmp = s - } - - _t := strings.Split(_tmp, ".") - if err != nil { - return ret, err - } - h, err := strconv.ParseInt(_t[0], 10, 32) - t += int(h) * clockTicks - h, err = strconv.ParseInt(_t[1], 10, 32) - t += int(h) - return float64(t) / float64(clockTicks), nil -} - func (p *Process) ChildrenWithContext(ctx context.Context) ([]*Process, error) { - pids, err := common.CallPgrepWithContext(ctx, invoke, p.Pid) + procs, err := ProcessesWithContext(ctx) if err != nil { - return nil, err + return nil, nil } - ret := make([]*Process, 0, len(pids)) - for _, pid := range pids { - np, err := NewProcessWithContext(ctx, pid) + ret := make([]*Process, 0, len(procs)) + for _, proc := range procs { + ppid, err := proc.PpidWithContext(ctx) if err != nil { - return nil, err + continue + } + if ppid == p.Pid { + ret = append(ret, proc) } - ret = append(ret, np) } + sort.Slice(ret, func(i, j int) bool { return ret[i].Pid < ret[j].Pid }) return ret, nil } @@ -323,3 +275,206 @@ func callPsWithContext(ctx context.Context, arg string, pid int32, threadOption return ret, nil } + +var ( + procPidPath common.ProcPidPathFunc + procPidInfo common.ProcPidInfoFunc + machTimeBaseInfo common.MachTimeBaseInfoFunc +) + +func registerFuncs() (*common.Library, error) { + lib, err := common.NewLibrary(common.System) + if err != nil { + return nil, err + } + + procPidPath = common.GetFunc[common.ProcPidPathFunc](lib, common.ProcPidPathSym) + procPidInfo = common.GetFunc[common.ProcPidInfoFunc](lib, common.ProcPidInfoSym) + machTimeBaseInfo = common.GetFunc[common.MachTimeBaseInfoFunc](lib, common.MachTimeBaseInfoSym) + + return lib, nil +} + +func getTimeScaleToNanoSeconds() float64 { + var timeBaseInfo common.MachTimeBaseInfo + + machTimeBaseInfo(uintptr(unsafe.Pointer(&timeBaseInfo))) + + return float64(timeBaseInfo.Numer) / float64(timeBaseInfo.Denom) +} + +func (p *Process) ExeWithContext(ctx context.Context) (string, error) { + lib, err := registerFuncs() + if err != nil { + return "", err + } + defer lib.Close() + + buf := make([]byte, common.PROC_PIDPATHINFO_MAXSIZE) + ret := procPidPath(p.Pid, uintptr(unsafe.Pointer(&buf[0])), common.PROC_PIDPATHINFO_MAXSIZE) + + if ret <= 0 { + return "", fmt.Errorf("unknown error: proc_pidpath returned %d", ret) + } + + return common.GoString(&buf[0]), nil +} + +// sys/proc_info.h +type vnodePathInfo struct { + _ [152]byte + vipPath [common.MAXPATHLEN]byte + _ [1176]byte +} + +// CwdWithContext retrieves the Current Working Directory for the given process. +// It uses the proc_pidinfo from libproc and will only work for processes the +// EUID can access. Otherwise "operation not permitted" will be returned as the +// error. +// Note: This might also work for other *BSD OSs. +func (p *Process) CwdWithContext(ctx context.Context) (string, error) { + lib, err := registerFuncs() + if err != nil { + return "", err + } + defer lib.Close() + + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + var vpi vnodePathInfo + const vpiSize = int32(unsafe.Sizeof(vpi)) + ret := procPidInfo(p.Pid, common.PROC_PIDVNODEPATHINFO, 0, uintptr(unsafe.Pointer(&vpi)), vpiSize) + errno, _ := lib.Dlsym("errno") + err = *(**unix.Errno)(unsafe.Pointer(&errno)) + if err == unix.EPERM { + return "", ErrorNotPermitted + } + + if ret <= 0 { + return "", fmt.Errorf("unknown error: proc_pidinfo returned %d", ret) + } + + if ret != vpiSize { + return "", fmt.Errorf("too few bytes; expected %d, got %d", vpiSize, ret) + } + return common.GoString(&vpi.vipPath[0]), nil +} + +func procArgs(pid int32) ([]byte, int, error) { + procargs, _, err := common.CallSyscall([]int32{common.CTL_KERN, common.KERN_PROCARGS2, pid}) + if err != nil { + return nil, 0, err + } + nargs := procargs[:4] + return procargs, int(binary.LittleEndian.Uint32(nargs)), nil +} + +func (p *Process) CmdlineSliceWithContext(ctx context.Context) ([]string, error) { + return p.cmdlineSliceWithContext(ctx, true) +} + +func (p *Process) cmdlineSliceWithContext(ctx context.Context, fallback bool) ([]string, error) { + pargs, nargs, err := procArgs(p.Pid) + if err != nil { + return nil, err + } + // The first bytes hold the nargs int, skip it. + args := bytes.Split((pargs)[unsafe.Sizeof(int(0)):], []byte{0}) + var argStr string + // The first element is the actual binary/command path. + // command := args[0] + var argSlice []string + // var envSlice []string + // All other, non-zero elements are arguments. The first "nargs" elements + // are the arguments. Everything else in the slice is then the environment + // of the process. + for _, arg := range args[1:] { + argStr = string(arg[:]) + if len(argStr) > 0 { + if nargs > 0 { + argSlice = append(argSlice, argStr) + nargs-- + continue + } + break + // envSlice = append(envSlice, argStr) + } + } + return argSlice, err +} + +// cmdNameWithContext returns the command name (including spaces) without any arguments +func (p *Process) cmdNameWithContext(ctx context.Context) (string, error) { + r, err := p.cmdlineSliceWithContext(ctx, false) + if err != nil { + return "", err + } + + if len(r) == 0 { + return "", nil + } + + return r[0], err +} + +func (p *Process) CmdlineWithContext(ctx context.Context) (string, error) { + r, err := p.CmdlineSliceWithContext(ctx) + if err != nil { + return "", err + } + return strings.Join(r, " "), err +} + +func (p *Process) NumThreadsWithContext(ctx context.Context) (int32, error) { + lib, err := registerFuncs() + if err != nil { + return 0, err + } + defer lib.Close() + + var ti ProcTaskInfo + const tiSize = int32(unsafe.Sizeof(ti)) + procPidInfo(p.Pid, common.PROC_PIDTASKINFO, 0, uintptr(unsafe.Pointer(&ti)), tiSize) + + return int32(ti.Threadnum), nil +} + +func (p *Process) TimesWithContext(ctx context.Context) (*cpu.TimesStat, error) { + lib, err := registerFuncs() + if err != nil { + return nil, err + } + defer lib.Close() + + var ti ProcTaskInfo + const tiSize = int32(unsafe.Sizeof(ti)) + procPidInfo(p.Pid, common.PROC_PIDTASKINFO, 0, uintptr(unsafe.Pointer(&ti)), tiSize) + + timescaleToNanoSeconds := getTimeScaleToNanoSeconds() + ret := &cpu.TimesStat{ + CPU: "cpu", + User: float64(ti.Total_user) * timescaleToNanoSeconds / 1e9, + System: float64(ti.Total_system) * timescaleToNanoSeconds / 1e9, + } + return ret, nil +} + +func (p *Process) MemoryInfoWithContext(ctx context.Context) (*MemoryInfoStat, error) { + lib, err := registerFuncs() + if err != nil { + return nil, err + } + defer lib.Close() + + var ti ProcTaskInfo + const tiSize = int32(unsafe.Sizeof(ti)) + procPidInfo(p.Pid, common.PROC_PIDTASKINFO, 0, uintptr(unsafe.Pointer(&ti)), tiSize) + + ret := &MemoryInfoStat{ + RSS: uint64(ti.Resident_size), + VMS: uint64(ti.Virtual_size), + Swap: uint64(ti.Pageins), + } + return ret, nil +} diff --git a/vendor/github.com/shirou/gopsutil/v4/process/process_darwin_amd64.go b/vendor/github.com/shirou/gopsutil/v4/process/process_darwin_amd64.go index a13522473a1..890a5d5331a 100644 --- a/vendor/github.com/shirou/gopsutil/v4/process/process_darwin_amd64.go +++ b/vendor/github.com/shirou/gopsutil/v4/process/process_darwin_amd64.go @@ -212,6 +212,27 @@ type Posix_cred struct { type Label struct{} +type ProcTaskInfo struct { + Virtual_size uint64 + Resident_size uint64 + Total_user uint64 + Total_system uint64 + Threads_user uint64 + Threads_system uint64 + Policy int32 + Faults int32 + Pageins int32 + Cow_faults int32 + Messages_sent int32 + Messages_received int32 + Syscalls_mach int32 + Syscalls_unix int32 + Csw int32 + Threadnum int32 + Numrunning int32 + Priority int32 +} + type AuditinfoAddr struct { Auid uint32 Mask AuMask diff --git a/vendor/github.com/shirou/gopsutil/v4/process/process_darwin_arm64.go b/vendor/github.com/shirou/gopsutil/v4/process/process_darwin_arm64.go index f1f3df365d9..8075cf227d1 100644 --- a/vendor/github.com/shirou/gopsutil/v4/process/process_darwin_arm64.go +++ b/vendor/github.com/shirou/gopsutil/v4/process/process_darwin_arm64.go @@ -190,6 +190,27 @@ type Posix_cred struct{} type Label struct{} +type ProcTaskInfo struct { + Virtual_size uint64 + Resident_size uint64 + Total_user uint64 + Total_system uint64 + Threads_user uint64 + Threads_system uint64 + Policy int32 + Faults int32 + Pageins int32 + Cow_faults int32 + Messages_sent int32 + Messages_received int32 + Syscalls_mach int32 + Syscalls_unix int32 + Csw int32 + Threadnum int32 + Numrunning int32 + Priority int32 +} + type AuditinfoAddr struct { Auid uint32 Mask AuMask diff --git a/vendor/github.com/shirou/gopsutil/v4/process/process_darwin_cgo.go b/vendor/github.com/shirou/gopsutil/v4/process/process_darwin_cgo.go deleted file mode 100644 index bbdfc963ebb..00000000000 --- a/vendor/github.com/shirou/gopsutil/v4/process/process_darwin_cgo.go +++ /dev/null @@ -1,222 +0,0 @@ -// SPDX-License-Identifier: BSD-3-Clause -//go:build darwin && cgo - -package process - -// #include -// #include -// #include -// #include -// #include -// #include -// #include -import "C" - -import ( - "bytes" - "context" - "fmt" - "strings" - "syscall" - "unsafe" - - "github.com/shirou/gopsutil/v4/cpu" -) - -var ( - argMax int - timescaleToNanoSeconds float64 -) - -func init() { - argMax = getArgMax() - timescaleToNanoSeconds = getTimeScaleToNanoSeconds() -} - -func getArgMax() int { - var ( - mib = [...]C.int{C.CTL_KERN, C.KERN_ARGMAX} - argmax C.int - size C.size_t = C.ulong(unsafe.Sizeof(argmax)) - ) - retval := C.sysctl(&mib[0], 2, unsafe.Pointer(&argmax), &size, C.NULL, 0) - if retval == 0 { - return int(argmax) - } - return 0 -} - -func getTimeScaleToNanoSeconds() float64 { - var timeBaseInfo C.struct_mach_timebase_info - - C.mach_timebase_info(&timeBaseInfo) - - return float64(timeBaseInfo.numer) / float64(timeBaseInfo.denom) -} - -func (p *Process) ExeWithContext(ctx context.Context) (string, error) { - var c C.char // need a var for unsafe.Sizeof need a var - const bufsize = C.PROC_PIDPATHINFO_MAXSIZE * unsafe.Sizeof(c) - buffer := (*C.char)(C.malloc(C.size_t(bufsize))) - defer C.free(unsafe.Pointer(buffer)) - - ret, err := C.proc_pidpath(C.int(p.Pid), unsafe.Pointer(buffer), C.uint32_t(bufsize)) - if err != nil { - return "", err - } - if ret <= 0 { - return "", fmt.Errorf("unknown error: proc_pidpath returned %d", ret) - } - - return C.GoString(buffer), nil -} - -// CwdWithContext retrieves the Current Working Directory for the given process. -// It uses the proc_pidinfo from libproc and will only work for processes the -// EUID can access. Otherwise "operation not permitted" will be returned as the -// error. -// Note: This might also work for other *BSD OSs. -func (p *Process) CwdWithContext(ctx context.Context) (string, error) { - const vpiSize = C.sizeof_struct_proc_vnodepathinfo - vpi := (*C.struct_proc_vnodepathinfo)(C.malloc(vpiSize)) - defer C.free(unsafe.Pointer(vpi)) - ret, err := C.proc_pidinfo(C.int(p.Pid), C.PROC_PIDVNODEPATHINFO, 0, unsafe.Pointer(vpi), vpiSize) - if err != nil { - // fmt.Printf("ret: %d %T\n", ret, err) - if err == syscall.EPERM { - return "", ErrorNotPermitted - } - return "", err - } - if ret <= 0 { - return "", fmt.Errorf("unknown error: proc_pidinfo returned %d", ret) - } - if ret != C.sizeof_struct_proc_vnodepathinfo { - return "", fmt.Errorf("too few bytes; expected %d, got %d", vpiSize, ret) - } - return C.GoString(&vpi.pvi_cdir.vip_path[0]), err -} - -func procArgs(pid int32) ([]byte, int, error) { - var ( - mib = [...]C.int{C.CTL_KERN, C.KERN_PROCARGS2, C.int(pid)} - size C.size_t = C.ulong(argMax) - nargs C.int - result []byte - ) - procargs := (*C.char)(C.malloc(C.ulong(argMax))) - defer C.free(unsafe.Pointer(procargs)) - retval, err := C.sysctl(&mib[0], 3, unsafe.Pointer(procargs), &size, C.NULL, 0) - if retval == 0 { - C.memcpy(unsafe.Pointer(&nargs), unsafe.Pointer(procargs), C.sizeof_int) - result = C.GoBytes(unsafe.Pointer(procargs), C.int(size)) - // fmt.Printf("size: %d %d\n%s\n", size, nargs, hex.Dump(result)) - return result, int(nargs), nil - } - return nil, 0, err -} - -func (p *Process) CmdlineSliceWithContext(ctx context.Context) ([]string, error) { - return p.cmdlineSliceWithContext(ctx, true) -} - -func (p *Process) cmdlineSliceWithContext(ctx context.Context, fallback bool) ([]string, error) { - pargs, nargs, err := procArgs(p.Pid) - if err != nil { - return nil, err - } - // The first bytes hold the nargs int, skip it. - args := bytes.Split((pargs)[C.sizeof_int:], []byte{0}) - var argStr string - // The first element is the actual binary/command path. - // command := args[0] - var argSlice []string - // var envSlice []string - // All other, non-zero elements are arguments. The first "nargs" elements - // are the arguments. Everything else in the slice is then the environment - // of the process. - for _, arg := range args[1:] { - argStr = string(arg[:]) - if len(argStr) > 0 { - if nargs > 0 { - argSlice = append(argSlice, argStr) - nargs-- - continue - } - break - // envSlice = append(envSlice, argStr) - } - } - return argSlice, err -} - -// cmdNameWithContext returns the command name (including spaces) without any arguments -func (p *Process) cmdNameWithContext(ctx context.Context) (string, error) { - r, err := p.cmdlineSliceWithContext(ctx, false) - if err != nil { - return "", err - } - - if len(r) == 0 { - return "", nil - } - - return r[0], err -} - -func (p *Process) CmdlineWithContext(ctx context.Context) (string, error) { - r, err := p.CmdlineSliceWithContext(ctx) - if err != nil { - return "", err - } - return strings.Join(r, " "), err -} - -func (p *Process) NumThreadsWithContext(ctx context.Context) (int32, error) { - const tiSize = C.sizeof_struct_proc_taskinfo - ti := (*C.struct_proc_taskinfo)(C.malloc(tiSize)) - defer C.free(unsafe.Pointer(ti)) - - _, err := C.proc_pidinfo(C.int(p.Pid), C.PROC_PIDTASKINFO, 0, unsafe.Pointer(ti), tiSize) - if err != nil { - return 0, err - } - - return int32(ti.pti_threadnum), nil -} - -func (p *Process) TimesWithContext(ctx context.Context) (*cpu.TimesStat, error) { - const tiSize = C.sizeof_struct_proc_taskinfo - ti := (*C.struct_proc_taskinfo)(C.malloc(tiSize)) - defer C.free(unsafe.Pointer(ti)) - - _, err := C.proc_pidinfo(C.int(p.Pid), C.PROC_PIDTASKINFO, 0, unsafe.Pointer(ti), tiSize) - if err != nil { - return nil, err - } - - ret := &cpu.TimesStat{ - CPU: "cpu", - User: float64(ti.pti_total_user) * timescaleToNanoSeconds / 1e9, - System: float64(ti.pti_total_system) * timescaleToNanoSeconds / 1e9, - } - return ret, nil -} - -func (p *Process) MemoryInfoWithContext(ctx context.Context) (*MemoryInfoStat, error) { - const tiSize = C.sizeof_struct_proc_taskinfo - ti := (*C.struct_proc_taskinfo)(C.malloc(tiSize)) - defer C.free(unsafe.Pointer(ti)) - - _, err := C.proc_pidinfo(C.int(p.Pid), C.PROC_PIDTASKINFO, 0, unsafe.Pointer(ti), tiSize) - if err != nil { - return nil, err - } - - ret := &MemoryInfoStat{ - RSS: uint64(ti.pti_resident_size), - VMS: uint64(ti.pti_virtual_size), - Swap: uint64(ti.pti_pageins), - } - return ret, nil -} diff --git a/vendor/github.com/shirou/gopsutil/v4/process/process_darwin_nocgo.go b/vendor/github.com/shirou/gopsutil/v4/process/process_darwin_nocgo.go deleted file mode 100644 index d498c9377a0..00000000000 --- a/vendor/github.com/shirou/gopsutil/v4/process/process_darwin_nocgo.go +++ /dev/null @@ -1,134 +0,0 @@ -// SPDX-License-Identifier: BSD-3-Clause -//go:build darwin && !cgo - -package process - -import ( - "context" - "fmt" - "strconv" - "strings" - - "github.com/shirou/gopsutil/v4/cpu" - "github.com/shirou/gopsutil/v4/internal/common" -) - -func (p *Process) CwdWithContext(ctx context.Context) (string, error) { - return "", common.ErrNotImplementedError -} - -func (p *Process) ExeWithContext(ctx context.Context) (string, error) { - out, err := invoke.CommandWithContext(ctx, "lsof", "-p", strconv.Itoa(int(p.Pid)), "-Fpfn") - if err != nil { - return "", fmt.Errorf("bad call to lsof: %w", err) - } - txtFound := 0 - lines := strings.Split(string(out), "\n") - fallback := "" - for i := 1; i < len(lines); i++ { - if lines[i] == "ftxt" { - txtFound++ - if txtFound == 1 { - fallback = lines[i-1][1:] - } - if txtFound == 2 { - return lines[i-1][1:], nil - } - } - } - if fallback != "" { - return fallback, nil - } - return "", fmt.Errorf("missing txt data returned by lsof") -} - -func (p *Process) CmdlineWithContext(ctx context.Context) (string, error) { - r, err := callPsWithContext(ctx, "command", p.Pid, false, false) - if err != nil { - return "", err - } - return strings.Join(r[0], " "), err -} - -func (p *Process) cmdNameWithContext(ctx context.Context) (string, error) { - r, err := callPsWithContext(ctx, "command", p.Pid, false, true) - if err != nil { - return "", err - } - if len(r) > 0 && len(r[0]) > 0 { - return r[0][0], err - } - - return "", err -} - -// CmdlineSliceWithContext returns the command line arguments of the process as a slice with each -// element being an argument. Because of current deficiencies in the way that the command -// line arguments are found, single arguments that have spaces in the will actually be -// reported as two separate items. In order to do something better CGO would be needed -// to use the native darwin functions. -func (p *Process) CmdlineSliceWithContext(ctx context.Context) ([]string, error) { - r, err := callPsWithContext(ctx, "command", p.Pid, false, false) - if err != nil { - return nil, err - } - return r[0], err -} - -func (p *Process) NumThreadsWithContext(ctx context.Context) (int32, error) { - r, err := callPsWithContext(ctx, "utime,stime", p.Pid, true, false) - if err != nil { - return 0, err - } - return int32(len(r)), nil -} - -func (p *Process) TimesWithContext(ctx context.Context) (*cpu.TimesStat, error) { - r, err := callPsWithContext(ctx, "utime,stime", p.Pid, false, false) - if err != nil { - return nil, err - } - - utime, err := convertCPUTimes(r[0][0]) - if err != nil { - return nil, err - } - stime, err := convertCPUTimes(r[0][1]) - if err != nil { - return nil, err - } - - ret := &cpu.TimesStat{ - CPU: "cpu", - User: utime, - System: stime, - } - return ret, nil -} - -func (p *Process) MemoryInfoWithContext(ctx context.Context) (*MemoryInfoStat, error) { - r, err := callPsWithContext(ctx, "rss,vsize,pagein", p.Pid, false, false) - if err != nil { - return nil, err - } - rss, err := strconv.ParseInt(r[0][0], 10, 64) - if err != nil { - return nil, err - } - vms, err := strconv.ParseInt(r[0][1], 10, 64) - if err != nil { - return nil, err - } - pagein, err := strconv.ParseInt(r[0][2], 10, 64) - if err != nil { - return nil, err - } - - ret := &MemoryInfoStat{ - RSS: uint64(rss) * 1024, - VMS: uint64(vms) * 1024, - Swap: uint64(pagein), - } - - return ret, nil -} diff --git a/vendor/github.com/shirou/gopsutil/v4/process/process_freebsd.go b/vendor/github.com/shirou/gopsutil/v4/process/process_freebsd.go index 436dcf03005..76373736bfc 100644 --- a/vendor/github.com/shirou/gopsutil/v4/process/process_freebsd.go +++ b/vendor/github.com/shirou/gopsutil/v4/process/process_freebsd.go @@ -8,6 +8,7 @@ import ( "context" "errors" "path/filepath" + "sort" "strconv" "strings" @@ -269,18 +270,21 @@ func (p *Process) MemoryInfoWithContext(ctx context.Context) (*MemoryInfoStat, e } func (p *Process) ChildrenWithContext(ctx context.Context) ([]*Process, error) { - pids, err := common.CallPgrepWithContext(ctx, invoke, p.Pid) + procs, err := ProcessesWithContext(ctx) if err != nil { - return nil, err + return nil, nil } - ret := make([]*Process, 0, len(pids)) - for _, pid := range pids { - np, err := NewProcessWithContext(ctx, pid) + ret := make([]*Process, 0, len(procs)) + for _, proc := range procs { + ppid, err := proc.PpidWithContext(ctx) if err != nil { - return nil, err + continue + } + if ppid == p.Pid { + ret = append(ret, proc) } - ret = append(ret, np) } + sort.Slice(ret, func(i, j int) bool { return ret[i].Pid < ret[j].Pid }) return ret, nil } diff --git a/vendor/github.com/shirou/gopsutil/v4/process/process_linux.go b/vendor/github.com/shirou/gopsutil/v4/process/process_linux.go index 7aff0448dfe..68a8c88c4a9 100644 --- a/vendor/github.com/shirou/gopsutil/v4/process/process_linux.go +++ b/vendor/github.com/shirou/gopsutil/v4/process/process_linux.go @@ -12,6 +12,7 @@ import ( "math" "os" "path/filepath" + "sort" "strconv" "strings" @@ -338,21 +339,34 @@ func (p *Process) PageFaultsWithContext(ctx context.Context) (*PageFaultsStat, e } func (p *Process) ChildrenWithContext(ctx context.Context) ([]*Process, error) { - pids, err := common.CallPgrepWithContext(ctx, invoke, p.Pid) + statFiles, err := filepath.Glob(common.HostProcWithContext(ctx, "[0-9]*/stat")) if err != nil { return nil, err } - if len(pids) == 0 { - return nil, ErrorNoChildren - } - ret := make([]*Process, 0, len(pids)) - for _, pid := range pids { - np, err := NewProcessWithContext(ctx, pid) + ret := make([]*Process, 0, len(statFiles)) + for _, statFile := range statFiles { + statContents, err := os.ReadFile(statFile) if err != nil { - return nil, err + continue + } + fields := splitProcStat(statContents) + pid, err := strconv.ParseInt(fields[1], 10, 32) + if err != nil { + continue + } + ppid, err := strconv.ParseInt(fields[4], 10, 32) + if err != nil { + continue + } + if int32(ppid) == p.Pid { + np, err := NewProcessWithContext(ctx, int32(pid)) + if err != nil { + continue + } + ret = append(ret, np) } - ret = append(ret, np) } + sort.Slice(ret, func(i, j int) bool { return ret[i].Pid < ret[j].Pid }) return ret, nil } @@ -1082,8 +1096,7 @@ func (p *Process) fillFromTIDStatWithContext(ctx context.Context, tid int32) (ui if err != nil { return 0, 0, nil, 0, 0, 0, nil, err } - ctime := (t / uint64(clockTicks)) + uint64(bootTime) - createTime := int64(ctime * 1000) + createTime := int64((t * 1000 / uint64(clockTicks)) + uint64(bootTime*1000)) rtpriority, err := strconv.ParseInt(fields[18], 10, 32) if err != nil { diff --git a/vendor/github.com/shirou/gopsutil/v4/process/process_openbsd.go b/vendor/github.com/shirou/gopsutil/v4/process/process_openbsd.go index e2d0ab46226..5e8a9e0b45e 100644 --- a/vendor/github.com/shirou/gopsutil/v4/process/process_openbsd.go +++ b/vendor/github.com/shirou/gopsutil/v4/process/process_openbsd.go @@ -11,6 +11,7 @@ import ( "fmt" "io" "path/filepath" + "sort" "strconv" "strings" "unsafe" @@ -286,18 +287,21 @@ func (p *Process) MemoryInfoWithContext(ctx context.Context) (*MemoryInfoStat, e } func (p *Process) ChildrenWithContext(ctx context.Context) ([]*Process, error) { - pids, err := common.CallPgrepWithContext(ctx, invoke, p.Pid) + procs, err := ProcessesWithContext(ctx) if err != nil { - return nil, err + return nil, nil } - ret := make([]*Process, 0, len(pids)) - for _, pid := range pids { - np, err := NewProcessWithContext(ctx, pid) + ret := make([]*Process, 0, len(procs)) + for _, proc := range procs { + ppid, err := proc.PpidWithContext(ctx) if err != nil { - return nil, err + continue + } + if ppid == p.Pid { + ret = append(ret, proc) } - ret = append(ret, np) } + sort.Slice(ret, func(i, j int) bool { return ret[i].Pid < ret[j].Pid }) return ret, nil } diff --git a/vendor/github.com/shirou/gopsutil/v4/process/process_windows.go b/vendor/github.com/shirou/gopsutil/v4/process/process_windows.go index 52e1086f780..b00c671e9f3 100644 --- a/vendor/github.com/shirou/gopsutil/v4/process/process_windows.go +++ b/vendor/github.com/shirou/gopsutil/v4/process/process_windows.go @@ -43,6 +43,7 @@ var ( procGetPriorityClass = common.Modkernel32.NewProc("GetPriorityClass") procGetProcessIoCounters = common.Modkernel32.NewProc("GetProcessIoCounters") procGetNativeSystemInfo = common.Modkernel32.NewProc("GetNativeSystemInfo") + procGetProcessHandleCount = common.Modkernel32.NewProc("GetProcessHandleCount") processorArchitecture uint ) @@ -548,8 +549,21 @@ func (p *Process) NumCtxSwitchesWithContext(ctx context.Context) (*NumCtxSwitche return nil, common.ErrNotImplementedError } +// NumFDsWithContext returns the number of handles for a process on Windows, +// not the number of file descriptors (FDs). func (p *Process) NumFDsWithContext(ctx context.Context) (int32, error) { - return 0, common.ErrNotImplementedError + handle, err := windows.OpenProcess(processQueryInformation, false, uint32(p.Pid)) + if err != nil { + return 0, err + } + defer windows.CloseHandle(handle) + + var handleCount uint32 + ret, _, err := procGetProcessHandleCount.Call(uintptr(handle), uintptr(unsafe.Pointer(&handleCount))) + if ret == 0 { + return 0, err + } + return int32(handleCount), nil } func (p *Process) NumThreadsWithContext(ctx context.Context) (int32, error) { diff --git a/vendor/github.com/shoenig/go-m1cpu/.golangci.yaml b/vendor/github.com/shoenig/go-m1cpu/.golangci.yaml deleted file mode 100644 index dc6fefb979e..00000000000 --- a/vendor/github.com/shoenig/go-m1cpu/.golangci.yaml +++ /dev/null @@ -1,12 +0,0 @@ -run: - timeout: 5m -linters: - enable: - - gofmt - - errcheck - - errname - - errorlint - - bodyclose - - durationcheck - - whitespace - diff --git a/vendor/github.com/shoenig/go-m1cpu/LICENSE b/vendor/github.com/shoenig/go-m1cpu/LICENSE deleted file mode 100644 index e87a115e462..00000000000 --- a/vendor/github.com/shoenig/go-m1cpu/LICENSE +++ /dev/null @@ -1,363 +0,0 @@ -Mozilla Public License, version 2.0 - -1. Definitions - -1.1. "Contributor" - - means each individual or legal entity that creates, contributes to the - creation of, or owns Covered Software. - -1.2. "Contributor Version" - - means the combination of the Contributions of others (if any) used by a - Contributor and that particular Contributor's Contribution. - -1.3. "Contribution" - - means Covered Software of a particular Contributor. - -1.4. "Covered Software" - - means Source Code Form to which the initial Contributor has attached the - notice in Exhibit A, the Executable Form of such Source Code Form, and - Modifications of such Source Code Form, in each case including portions - thereof. - -1.5. "Incompatible With Secondary Licenses" - means - - a. that the initial Contributor has attached the notice described in - Exhibit B to the Covered Software; or - - b. that the Covered Software was made available under the terms of - version 1.1 or earlier of the License, but not also under the terms of - a Secondary License. - -1.6. "Executable Form" - - means any form of the work other than Source Code Form. - -1.7. "Larger Work" - - means a work that combines Covered Software with other material, in a - separate file or files, that is not Covered Software. - -1.8. "License" - - means this document. - -1.9. "Licensable" - - means having the right to grant, to the maximum extent possible, whether - at the time of the initial grant or subsequently, any and all of the - rights conveyed by this License. - -1.10. "Modifications" - - means any of the following: - - a. any file in Source Code Form that results from an addition to, - deletion from, or modification of the contents of Covered Software; or - - b. any new file in Source Code Form that contains any Covered Software. - -1.11. "Patent Claims" of a Contributor - - means any patent claim(s), including without limitation, method, - process, and apparatus claims, in any patent Licensable by such - Contributor that would be infringed, but for the grant of the License, - by the making, using, selling, offering for sale, having made, import, - or transfer of either its Contributions or its Contributor Version. - -1.12. "Secondary License" - - means either the GNU General Public License, Version 2.0, the GNU Lesser - General Public License, Version 2.1, the GNU Affero General Public - License, Version 3.0, or any later versions of those licenses. - -1.13. "Source Code Form" - - means the form of the work preferred for making modifications. - -1.14. "You" (or "Your") - - means an individual or a legal entity exercising rights under this - License. For legal entities, "You" includes any entity that controls, is - controlled by, or is under common control with You. For purposes of this - definition, "control" means (a) the power, direct or indirect, to cause - the direction or management of such entity, whether by contract or - otherwise, or (b) ownership of more than fifty percent (50%) of the - outstanding shares or beneficial ownership of such entity. - - -2. License Grants and Conditions - -2.1. Grants - - Each Contributor hereby grants You a world-wide, royalty-free, - non-exclusive license: - - a. under intellectual property rights (other than patent or trademark) - Licensable by such Contributor to use, reproduce, make available, - modify, display, perform, distribute, and otherwise exploit its - Contributions, either on an unmodified basis, with Modifications, or - as part of a Larger Work; and - - b. under Patent Claims of such Contributor to make, use, sell, offer for - sale, have made, import, and otherwise transfer either its - Contributions or its Contributor Version. - -2.2. Effective Date - - The licenses granted in Section 2.1 with respect to any Contribution - become effective for each Contribution on the date the Contributor first - distributes such Contribution. - -2.3. Limitations on Grant Scope - - The licenses granted in this Section 2 are the only rights granted under - this License. No additional rights or licenses will be implied from the - distribution or licensing of Covered Software under this License. - Notwithstanding Section 2.1(b) above, no patent license is granted by a - Contributor: - - a. for any code that a Contributor has removed from Covered Software; or - - b. for infringements caused by: (i) Your and any other third party's - modifications of Covered Software, or (ii) the combination of its - Contributions with other software (except as part of its Contributor - Version); or - - c. under Patent Claims infringed by Covered Software in the absence of - its Contributions. - - This License does not grant any rights in the trademarks, service marks, - or logos of any Contributor (except as may be necessary to comply with - the notice requirements in Section 3.4). - -2.4. Subsequent Licenses - - No Contributor makes additional grants as a result of Your choice to - distribute the Covered Software under a subsequent version of this - License (see Section 10.2) or under the terms of a Secondary License (if - permitted under the terms of Section 3.3). - -2.5. Representation - - Each Contributor represents that the Contributor believes its - Contributions are its original creation(s) or it has sufficient rights to - grant the rights to its Contributions conveyed by this License. - -2.6. Fair Use - - This License is not intended to limit any rights You have under - applicable copyright doctrines of fair use, fair dealing, or other - equivalents. - -2.7. Conditions - - Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in - Section 2.1. - - -3. Responsibilities - -3.1. Distribution of Source Form - - All distribution of Covered Software in Source Code Form, including any - Modifications that You create or to which You contribute, must be under - the terms of this License. You must inform recipients that the Source - Code Form of the Covered Software is governed by the terms of this - License, and how they can obtain a copy of this License. You may not - attempt to alter or restrict the recipients' rights in the Source Code - Form. - -3.2. Distribution of Executable Form - - If You distribute Covered Software in Executable Form then: - - a. such Covered Software must also be made available in Source Code Form, - as described in Section 3.1, and You must inform recipients of the - Executable Form how they can obtain a copy of such Source Code Form by - reasonable means in a timely manner, at a charge no more than the cost - of distribution to the recipient; and - - b. You may distribute such Executable Form under the terms of this - License, or sublicense it under different terms, provided that the - license for the Executable Form does not attempt to limit or alter the - recipients' rights in the Source Code Form under this License. - -3.3. Distribution of a Larger Work - - You may create and distribute a Larger Work under terms of Your choice, - provided that You also comply with the requirements of this License for - the Covered Software. If the Larger Work is a combination of Covered - Software with a work governed by one or more Secondary Licenses, and the - Covered Software is not Incompatible With Secondary Licenses, this - License permits You to additionally distribute such Covered Software - under the terms of such Secondary License(s), so that the recipient of - the Larger Work may, at their option, further distribute the Covered - Software under the terms of either this License or such Secondary - License(s). - -3.4. Notices - - You may not remove or alter the substance of any license notices - (including copyright notices, patent notices, disclaimers of warranty, or - limitations of liability) contained within the Source Code Form of the - Covered Software, except that You may alter any license notices to the - extent required to remedy known factual inaccuracies. - -3.5. Application of Additional Terms - - You may choose to offer, and to charge a fee for, warranty, support, - indemnity or liability obligations to one or more recipients of Covered - Software. However, You may do so only on Your own behalf, and not on - behalf of any Contributor. You must make it absolutely clear that any - such warranty, support, indemnity, or liability obligation is offered by - You alone, and You hereby agree to indemnify every Contributor for any - liability incurred by such Contributor as a result of warranty, support, - indemnity or liability terms You offer. You may include additional - disclaimers of warranty and limitations of liability specific to any - jurisdiction. - -4. Inability to Comply Due to Statute or Regulation - - If it is impossible for You to comply with any of the terms of this License - with respect to some or all of the Covered Software due to statute, - judicial order, or regulation then You must: (a) comply with the terms of - this License to the maximum extent possible; and (b) describe the - limitations and the code they affect. Such description must be placed in a - text file included with all distributions of the Covered Software under - this License. Except to the extent prohibited by statute or regulation, - such description must be sufficiently detailed for a recipient of ordinary - skill to be able to understand it. - -5. Termination - -5.1. The rights granted under this License will terminate automatically if You - fail to comply with any of its terms. However, if You become compliant, - then the rights granted under this License from a particular Contributor - are reinstated (a) provisionally, unless and until such Contributor - explicitly and finally terminates Your grants, and (b) on an ongoing - basis, if such Contributor fails to notify You of the non-compliance by - some reasonable means prior to 60 days after You have come back into - compliance. Moreover, Your grants from a particular Contributor are - reinstated on an ongoing basis if such Contributor notifies You of the - non-compliance by some reasonable means, this is the first time You have - received notice of non-compliance with this License from such - Contributor, and You become compliant prior to 30 days after Your receipt - of the notice. - -5.2. If You initiate litigation against any entity by asserting a patent - infringement claim (excluding declaratory judgment actions, - counter-claims, and cross-claims) alleging that a Contributor Version - directly or indirectly infringes any patent, then the rights granted to - You by any and all Contributors for the Covered Software under Section - 2.1 of this License shall terminate. - -5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user - license agreements (excluding distributors and resellers) which have been - validly granted by You or Your distributors under this License prior to - termination shall survive termination. - -6. Disclaimer of Warranty - - Covered Software is provided under this License on an "as is" basis, - without warranty of any kind, either expressed, implied, or statutory, - including, without limitation, warranties that the Covered Software is free - of defects, merchantable, fit for a particular purpose or non-infringing. - The entire risk as to the quality and performance of the Covered Software - is with You. Should any Covered Software prove defective in any respect, - You (not any Contributor) assume the cost of any necessary servicing, - repair, or correction. This disclaimer of warranty constitutes an essential - part of this License. No use of any Covered Software is authorized under - this License except under this disclaimer. - -7. Limitation of Liability - - Under no circumstances and under no legal theory, whether tort (including - negligence), contract, or otherwise, shall any Contributor, or anyone who - distributes Covered Software as permitted above, be liable to You for any - direct, indirect, special, incidental, or consequential damages of any - character including, without limitation, damages for lost profits, loss of - goodwill, work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses, even if such party shall have been - informed of the possibility of such damages. This limitation of liability - shall not apply to liability for death or personal injury resulting from - such party's negligence to the extent applicable law prohibits such - limitation. Some jurisdictions do not allow the exclusion or limitation of - incidental or consequential damages, so this exclusion and limitation may - not apply to You. - -8. Litigation - - Any litigation relating to this License may be brought only in the courts - of a jurisdiction where the defendant maintains its principal place of - business and such litigation shall be governed by laws of that - jurisdiction, without reference to its conflict-of-law provisions. Nothing - in this Section shall prevent a party's ability to bring cross-claims or - counter-claims. - -9. Miscellaneous - - This License represents the complete agreement concerning the subject - matter hereof. If any provision of this License is held to be - unenforceable, such provision shall be reformed only to the extent - necessary to make it enforceable. Any law or regulation which provides that - the language of a contract shall be construed against the drafter shall not - be used to construe this License against a Contributor. - - -10. Versions of the License - -10.1. New Versions - - Mozilla Foundation is the license steward. Except as provided in Section - 10.3, no one other than the license steward has the right to modify or - publish new versions of this License. Each version will be given a - distinguishing version number. - -10.2. Effect of New Versions - - You may distribute the Covered Software under the terms of the version - of the License under which You originally received the Covered Software, - or under the terms of any subsequent version published by the license - steward. - -10.3. Modified Versions - - If you create software not governed by this License, and you want to - create a new license for such software, you may create and use a - modified version of this License if you rename the license and remove - any references to the name of the license steward (except to note that - such modified license differs from this License). - -10.4. Distributing Source Code Form that is Incompatible With Secondary - Licenses If You choose to distribute Source Code Form that is - Incompatible With Secondary Licenses under the terms of this version of - the License, the notice described in Exhibit B of this License must be - attached. - -Exhibit A - Source Code Form License Notice - - This Source Code Form is subject to the - terms of the Mozilla Public License, v. - 2.0. If a copy of the MPL was not - distributed with this file, You can - obtain one at - http://mozilla.org/MPL/2.0/. - -If it is not possible or desirable to put the notice in a particular file, -then You may include the notice in a location (such as a LICENSE file in a -relevant directory) where a recipient would be likely to look for such a -notice. - -You may add additional accurate notices of copyright ownership. - -Exhibit B - "Incompatible With Secondary Licenses" Notice - - This Source Code Form is "Incompatible - With Secondary Licenses", as defined by - the Mozilla Public License, v. 2.0. - diff --git a/vendor/github.com/shoenig/go-m1cpu/Makefile b/vendor/github.com/shoenig/go-m1cpu/Makefile deleted file mode 100644 index 28d786397d4..00000000000 --- a/vendor/github.com/shoenig/go-m1cpu/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -SHELL = bash - -default: test - -.PHONY: test -test: - @echo "--> Running Tests ..." - @go test -v -race ./... - -vet: - @echo "--> Vet Go sources ..." - @go vet ./... diff --git a/vendor/github.com/shoenig/go-m1cpu/README.md b/vendor/github.com/shoenig/go-m1cpu/README.md deleted file mode 100644 index 399657acf86..00000000000 --- a/vendor/github.com/shoenig/go-m1cpu/README.md +++ /dev/null @@ -1,66 +0,0 @@ -# m1cpu - -[![Go Reference](https://pkg.go.dev/badge/github.com/shoenig/go-m1cpu.svg)](https://pkg.go.dev/github.com/shoenig/go-m1cpu) -[![MPL License](https://img.shields.io/github/license/shoenig/go-m1cpu?color=g&style=flat-square)](https://github.com/shoenig/go-m1cpu/blob/main/LICENSE) -[![Run CI Tests](https://github.com/shoenig/go-m1cpu/actions/workflows/ci.yaml/badge.svg)](https://github.com/shoenig/go-m1cpu/actions/workflows/ci.yaml) - -The `go-m1cpu` module is a library for inspecting Apple Silicon CPUs in Go. - -Use the `m1cpu` Go package for looking up the CPU frequency for Apple M1 and M2 CPUs. - -# Install - -```shell -go get github.com/shoenig/go-m1cpu@latest -``` - -# CGO - -This package requires the use of [CGO](https://go.dev/blog/cgo). - -Extracting the CPU properties is done via Apple's [IOKit](https://developer.apple.com/documentation/iokit?language=objc) -framework, which is accessible only through system C libraries. - -# Example - -Simple Go program to print Apple Silicon M1/M2 CPU speeds. - -```go -package main - -import ( - "fmt" - - "github.com/shoenig/go-m1cpu" -) - -func main() { - fmt.Println("Apple Silicon", m1cpu.IsAppleSilicon()) - - fmt.Println("pCore GHz", m1cpu.PCoreGHz()) - fmt.Println("eCore GHz", m1cpu.ECoreGHz()) - - fmt.Println("pCore Hz", m1cpu.PCoreHz()) - fmt.Println("eCore Hz", m1cpu.ECoreHz()) -} -``` - -Using `go test` to print out available information. - -``` -➜ go test -v -run Show -=== RUN Test_Show - cpu_test.go:42: pCore Hz 3504000000 - cpu_test.go:43: eCore Hz 2424000000 - cpu_test.go:44: pCore GHz 3.504 - cpu_test.go:45: eCore GHz 2.424 - cpu_test.go:46: pCore count 8 - cpu_test.go:47: eCoreCount 4 - cpu_test.go:50: pCore Caches 196608 131072 16777216 - cpu_test.go:53: eCore Caches 131072 65536 4194304 ---- PASS: Test_Show (0.00s) -``` - -# License - -Open source under the [MPL](LICENSE) diff --git a/vendor/github.com/shoenig/go-m1cpu/cpu.go b/vendor/github.com/shoenig/go-m1cpu/cpu.go deleted file mode 100644 index 502a8cce92e..00000000000 --- a/vendor/github.com/shoenig/go-m1cpu/cpu.go +++ /dev/null @@ -1,213 +0,0 @@ -//go:build darwin && arm64 && cgo - -package m1cpu - -// #cgo LDFLAGS: -framework CoreFoundation -framework IOKit -// #include -// #include -// #include -// #include -// -// #if !defined(MAC_OS_VERSION_12_0) || MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_12_0 -// #define kIOMainPortDefault kIOMasterPortDefault -// #endif -// -// #define HzToGHz(hz) ((hz) / 1000000000.0) -// -// UInt64 global_pCoreHz; -// UInt64 global_eCoreHz; -// int global_pCoreCount; -// int global_eCoreCount; -// int global_pCoreL1InstCacheSize; -// int global_eCoreL1InstCacheSize; -// int global_pCoreL1DataCacheSize; -// int global_eCoreL1DataCacheSize; -// int global_pCoreL2CacheSize; -// int global_eCoreL2CacheSize; -// char global_brand[32]; -// -// UInt64 getFrequency(CFTypeRef typeRef) { -// CFDataRef cfData = typeRef; -// -// CFIndex size = CFDataGetLength(cfData); -// UInt8 buf[size]; -// CFDataGetBytes(cfData, CFRangeMake(0, size), buf); -// -// UInt8 b1 = buf[size-5]; -// UInt8 b2 = buf[size-6]; -// UInt8 b3 = buf[size-7]; -// UInt8 b4 = buf[size-8]; -// -// UInt64 pCoreHz = 0x00000000FFFFFFFF & ((b1<<24) | (b2 << 16) | (b3 << 8) | (b4)); -// return pCoreHz; -// } -// -// int sysctl_int(const char * name) { -// int value = -1; -// size_t size = 8; -// sysctlbyname(name, &value, &size, NULL, 0); -// return value; -// } -// -// void sysctl_string(const char * name, char * dest) { -// size_t size = 32; -// sysctlbyname(name, dest, &size, NULL, 0); -// } -// -// void initialize() { -// global_pCoreCount = sysctl_int("hw.perflevel0.physicalcpu"); -// global_eCoreCount = sysctl_int("hw.perflevel1.physicalcpu"); -// global_pCoreL1InstCacheSize = sysctl_int("hw.perflevel0.l1icachesize"); -// global_eCoreL1InstCacheSize = sysctl_int("hw.perflevel1.l1icachesize"); -// global_pCoreL1DataCacheSize = sysctl_int("hw.perflevel0.l1dcachesize"); -// global_eCoreL1DataCacheSize = sysctl_int("hw.perflevel1.l1dcachesize"); -// global_pCoreL2CacheSize = sysctl_int("hw.perflevel0.l2cachesize"); -// global_eCoreL2CacheSize = sysctl_int("hw.perflevel1.l2cachesize"); -// sysctl_string("machdep.cpu.brand_string", global_brand); -// -// CFMutableDictionaryRef matching = IOServiceMatching("AppleARMIODevice"); -// io_iterator_t iter; -// IOServiceGetMatchingServices(kIOMainPortDefault, matching, &iter); -// -// const size_t bufsize = 512; -// io_object_t obj; -// while ((obj = IOIteratorNext(iter))) { -// char class[bufsize]; -// IOObjectGetClass(obj, class); -// char name[bufsize]; -// IORegistryEntryGetName(obj, name); -// -// if (strncmp(name, "pmgr", bufsize) == 0) { -// CFTypeRef pCoreRef = IORegistryEntryCreateCFProperty(obj, CFSTR("voltage-states5-sram"), kCFAllocatorDefault, 0); -// CFTypeRef eCoreRef = IORegistryEntryCreateCFProperty(obj, CFSTR("voltage-states1-sram"), kCFAllocatorDefault, 0); -// -// long long pCoreHz = getFrequency(pCoreRef); -// long long eCoreHz = getFrequency(eCoreRef); -// -// global_pCoreHz = pCoreHz; -// global_eCoreHz = eCoreHz; -// return; -// } -// } -// } -// -// UInt64 eCoreHz() { -// return global_eCoreHz; -// } -// -// UInt64 pCoreHz() { -// return global_pCoreHz; -// } -// -// Float64 eCoreGHz() { -// return HzToGHz(global_eCoreHz); -// } -// -// Float64 pCoreGHz() { -// return HzToGHz(global_pCoreHz); -// } -// -// int pCoreCount() { -// return global_pCoreCount; -// } -// -// int eCoreCount() { -// return global_eCoreCount; -// } -// -// int pCoreL1InstCacheSize() { -// return global_pCoreL1InstCacheSize; -// } -// -// int pCoreL1DataCacheSize() { -// return global_pCoreL1DataCacheSize; -// } -// -// int pCoreL2CacheSize() { -// return global_pCoreL2CacheSize; -// } -// -// int eCoreL1InstCacheSize() { -// return global_eCoreL1InstCacheSize; -// } -// -// int eCoreL1DataCacheSize() { -// return global_eCoreL1DataCacheSize; -// } -// -// int eCoreL2CacheSize() { -// return global_eCoreL2CacheSize; -// } -// -// char * modelName() { -// return global_brand; -// } -import "C" - -func init() { - C.initialize() -} - -// IsAppleSilicon returns true on this platform. -func IsAppleSilicon() bool { - return true -} - -// PCoreHZ returns the max frequency in Hertz of the P-Core of an Apple Silicon CPU. -func PCoreHz() uint64 { - return uint64(C.pCoreHz()) -} - -// ECoreHZ returns the max frequency in Hertz of the E-Core of an Apple Silicon CPU. -func ECoreHz() uint64 { - return uint64(C.eCoreHz()) -} - -// PCoreGHz returns the max frequency in Gigahertz of the P-Core of an Apple Silicon CPU. -func PCoreGHz() float64 { - return float64(C.pCoreGHz()) -} - -// ECoreGHz returns the max frequency in Gigahertz of the E-Core of an Apple Silicon CPU. -func ECoreGHz() float64 { - return float64(C.eCoreGHz()) -} - -// PCoreCount returns the number of physical P (performance) cores. -func PCoreCount() int { - return int(C.pCoreCount()) -} - -// ECoreCount returns the number of physical E (efficiency) cores. -func ECoreCount() int { - return int(C.eCoreCount()) -} - -// PCoreCacheSize returns the sizes of the P (performance) core cache sizes -// in the order of -// -// - L1 instruction cache -// - L1 data cache -// - L2 cache -func PCoreCache() (int, int, int) { - return int(C.pCoreL1InstCacheSize()), - int(C.pCoreL1DataCacheSize()), - int(C.pCoreL2CacheSize()) -} - -// ECoreCacheSize returns the sizes of the E (efficiency) core cache sizes -// in the order of -// -// - L1 instruction cache -// - L1 data cache -// - L2 cache -func ECoreCache() (int, int, int) { - return int(C.eCoreL1InstCacheSize()), - int(C.eCoreL1DataCacheSize()), - int(C.eCoreL2CacheSize()) -} - -// ModelName returns the model name of the CPU. -func ModelName() string { - return C.GoString(C.modelName()) -} diff --git a/vendor/github.com/shoenig/go-m1cpu/incompatible.go b/vendor/github.com/shoenig/go-m1cpu/incompatible.go deleted file mode 100644 index d425025aa84..00000000000 --- a/vendor/github.com/shoenig/go-m1cpu/incompatible.go +++ /dev/null @@ -1,53 +0,0 @@ -//go:build !darwin || !arm64 || !cgo - -package m1cpu - -// IsAppleSilicon return false on this platform. -func IsAppleSilicon() bool { - return false -} - -// PCoreHZ requires darwin/arm64 -func PCoreHz() uint64 { - panic("m1cpu: not a darwin/arm64 system") -} - -// ECoreHZ requires darwin/arm64 -func ECoreHz() uint64 { - panic("m1cpu: not a darwin/arm64 system") -} - -// PCoreGHz requires darwin/arm64 -func PCoreGHz() float64 { - panic("m1cpu: not a darwin/arm64 system") -} - -// ECoreGHz requires darwin/arm64 -func ECoreGHz() float64 { - panic("m1cpu: not a darwin/arm64 system") -} - -// PCoreCount requires darwin/arm64 -func PCoreCount() int { - panic("m1cpu: not a darwin/arm64 system") -} - -// ECoreCount requires darwin/arm64 -func ECoreCount() int { - panic("m1cpu: not a darwin/arm64 system") -} - -// PCoreCacheSize requires darwin/arm64 -func PCoreCache() (int, int, int) { - panic("m1cpu: not a darwin/arm64 system") -} - -// ECoreCacheSize requires darwin/arm64 -func ECoreCache() (int, int, int) { - panic("m1cpu: not a darwin/arm64 system") -} - -// ModelName requires darwin/arm64 -func ModelName() string { - panic("m1cpu: not a darwin/arm64 system") -} diff --git a/vendor/google.golang.org/protobuf/encoding/protojson/decode.go b/vendor/google.golang.org/protobuf/encoding/protojson/decode.go index bb2966e3b4c..8f9e592f870 100644 --- a/vendor/google.golang.org/protobuf/encoding/protojson/decode.go +++ b/vendor/google.golang.org/protobuf/encoding/protojson/decode.go @@ -351,7 +351,7 @@ func (d decoder) unmarshalScalar(fd protoreflect.FieldDescriptor) (protoreflect. panic(fmt.Sprintf("unmarshalScalar: invalid scalar kind %v", kind)) } - return protoreflect.Value{}, d.newError(tok.Pos(), "invalid value for %v type: %v", kind, tok.RawString()) + return protoreflect.Value{}, d.newError(tok.Pos(), "invalid value for %v field %v: %v", kind, fd.JSONName(), tok.RawString()) } func unmarshalInt(tok json.Token, bitSize int) (protoreflect.Value, bool) { diff --git a/vendor/google.golang.org/protobuf/encoding/protojson/encode.go b/vendor/google.golang.org/protobuf/encoding/protojson/encode.go index 29846df222c..0e72d85378b 100644 --- a/vendor/google.golang.org/protobuf/encoding/protojson/encode.go +++ b/vendor/google.golang.org/protobuf/encoding/protojson/encode.go @@ -216,9 +216,7 @@ func (m unpopulatedFieldRanger) Range(f func(protoreflect.FieldDescriptor, proto } v := m.Get(fd) - isProto2Scalar := fd.Syntax() == protoreflect.Proto2 && fd.Default().IsValid() - isSingularMessage := fd.Cardinality() != protoreflect.Repeated && fd.Message() != nil - if isProto2Scalar || isSingularMessage { + if fd.HasPresence() { if m.skipNull { continue } diff --git a/vendor/google.golang.org/protobuf/internal/descopts/options.go b/vendor/google.golang.org/protobuf/internal/descopts/options.go index 8401be8c84f..024ffebd3dd 100644 --- a/vendor/google.golang.org/protobuf/internal/descopts/options.go +++ b/vendor/google.golang.org/protobuf/internal/descopts/options.go @@ -9,7 +9,7 @@ // dependency on the descriptor proto package). package descopts -import pref "google.golang.org/protobuf/reflect/protoreflect" +import "google.golang.org/protobuf/reflect/protoreflect" // These variables are set by the init function in descriptor.pb.go via logic // in internal/filetype. In other words, so long as the descriptor proto package @@ -17,13 +17,13 @@ import pref "google.golang.org/protobuf/reflect/protoreflect" // // Each variable is populated with a nil pointer to the options struct. var ( - File pref.ProtoMessage - Enum pref.ProtoMessage - EnumValue pref.ProtoMessage - Message pref.ProtoMessage - Field pref.ProtoMessage - Oneof pref.ProtoMessage - ExtensionRange pref.ProtoMessage - Service pref.ProtoMessage - Method pref.ProtoMessage + File protoreflect.ProtoMessage + Enum protoreflect.ProtoMessage + EnumValue protoreflect.ProtoMessage + Message protoreflect.ProtoMessage + Field protoreflect.ProtoMessage + Oneof protoreflect.ProtoMessage + ExtensionRange protoreflect.ProtoMessage + Service protoreflect.ProtoMessage + Method protoreflect.ProtoMessage ) diff --git a/vendor/google.golang.org/protobuf/internal/editionssupport/editions.go b/vendor/google.golang.org/protobuf/internal/editionssupport/editions.go index 029a6a12d74..08dad7692c6 100644 --- a/vendor/google.golang.org/protobuf/internal/editionssupport/editions.go +++ b/vendor/google.golang.org/protobuf/internal/editionssupport/editions.go @@ -5,7 +5,7 @@ // Package editionssupport defines constants for editions that are supported. package editionssupport -import descriptorpb "google.golang.org/protobuf/types/descriptorpb" +import "google.golang.org/protobuf/types/descriptorpb" const ( Minimum = descriptorpb.Edition_EDITION_PROTO2 diff --git a/vendor/google.golang.org/protobuf/internal/filedesc/desc.go b/vendor/google.golang.org/protobuf/internal/filedesc/desc.go index df53ff40b25..fa790e0ff19 100644 --- a/vendor/google.golang.org/protobuf/internal/filedesc/desc.go +++ b/vendor/google.golang.org/protobuf/internal/filedesc/desc.go @@ -258,6 +258,7 @@ type ( StringName stringName IsProto3Optional bool // promoted from google.protobuf.FieldDescriptorProto IsWeak bool // promoted from google.protobuf.FieldOptions + IsLazy bool // promoted from google.protobuf.FieldOptions Default defaultValue ContainingOneof protoreflect.OneofDescriptor // must be consistent with Message.Oneofs.Fields Enum protoreflect.EnumDescriptor @@ -351,6 +352,7 @@ func (fd *Field) IsPacked() bool { } func (fd *Field) IsExtension() bool { return false } func (fd *Field) IsWeak() bool { return fd.L1.IsWeak } +func (fd *Field) IsLazy() bool { return fd.L1.IsLazy } func (fd *Field) IsList() bool { return fd.Cardinality() == protoreflect.Repeated && !fd.IsMap() } func (fd *Field) IsMap() bool { return fd.Message() != nil && fd.Message().IsMapEntry() } func (fd *Field) MapKey() protoreflect.FieldDescriptor { @@ -425,6 +427,7 @@ type ( Extendee protoreflect.MessageDescriptor Cardinality protoreflect.Cardinality Kind protoreflect.Kind + IsLazy bool EditionFeatures EditionFeatures } ExtensionL2 struct { @@ -465,6 +468,7 @@ func (xd *Extension) IsPacked() bool { } func (xd *Extension) IsExtension() bool { return true } func (xd *Extension) IsWeak() bool { return false } +func (xd *Extension) IsLazy() bool { return xd.L1.IsLazy } func (xd *Extension) IsList() bool { return xd.Cardinality() == protoreflect.Repeated } func (xd *Extension) IsMap() bool { return false } func (xd *Extension) MapKey() protoreflect.FieldDescriptor { return nil } diff --git a/vendor/google.golang.org/protobuf/internal/filedesc/desc_init.go b/vendor/google.golang.org/protobuf/internal/filedesc/desc_init.go index 8a57d60b08c..d2f549497eb 100644 --- a/vendor/google.golang.org/protobuf/internal/filedesc/desc_init.go +++ b/vendor/google.golang.org/protobuf/internal/filedesc/desc_init.go @@ -495,6 +495,8 @@ func (xd *Extension) unmarshalOptions(b []byte) { switch num { case genid.FieldOptions_Packed_field_number: xd.L1.EditionFeatures.IsPacked = protowire.DecodeBool(v) + case genid.FieldOptions_Lazy_field_number: + xd.L1.IsLazy = protowire.DecodeBool(v) } case protowire.BytesType: v, m := protowire.ConsumeBytes(b) diff --git a/vendor/google.golang.org/protobuf/internal/filedesc/desc_lazy.go b/vendor/google.golang.org/protobuf/internal/filedesc/desc_lazy.go index e56c91a8dbe..67a51b327c5 100644 --- a/vendor/google.golang.org/protobuf/internal/filedesc/desc_lazy.go +++ b/vendor/google.golang.org/protobuf/internal/filedesc/desc_lazy.go @@ -504,6 +504,8 @@ func (fd *Field) unmarshalOptions(b []byte) { fd.L1.EditionFeatures.IsPacked = protowire.DecodeBool(v) case genid.FieldOptions_Weak_field_number: fd.L1.IsWeak = protowire.DecodeBool(v) + case genid.FieldOptions_Lazy_field_number: + fd.L1.IsLazy = protowire.DecodeBool(v) case FieldOptions_EnforceUTF8: fd.L1.EditionFeatures.IsUTF8Validated = protowire.DecodeBool(v) } diff --git a/vendor/google.golang.org/protobuf/internal/filedesc/editions.go b/vendor/google.golang.org/protobuf/internal/filedesc/editions.go index 11f5f356b66..fd4d0c83d25 100644 --- a/vendor/google.golang.org/protobuf/internal/filedesc/editions.go +++ b/vendor/google.golang.org/protobuf/internal/filedesc/editions.go @@ -68,7 +68,7 @@ func unmarshalFeatureSet(b []byte, parent EditionFeatures) EditionFeatures { v, m := protowire.ConsumeBytes(b) b = b[m:] switch num { - case genid.GoFeatures_LegacyUnmarshalJsonEnum_field_number: + case genid.FeatureSet_Go_ext_number: parent = unmarshalGoFeature(v, parent) } } diff --git a/vendor/google.golang.org/protobuf/internal/genid/doc.go b/vendor/google.golang.org/protobuf/internal/genid/doc.go index 45ccd01211c..d9b9d916a20 100644 --- a/vendor/google.golang.org/protobuf/internal/genid/doc.go +++ b/vendor/google.golang.org/protobuf/internal/genid/doc.go @@ -6,6 +6,6 @@ // and the well-known types. package genid -import protoreflect "google.golang.org/protobuf/reflect/protoreflect" +import "google.golang.org/protobuf/reflect/protoreflect" const GoogleProtobuf_package protoreflect.FullName = "google.protobuf" diff --git a/vendor/google.golang.org/protobuf/internal/genid/go_features_gen.go b/vendor/google.golang.org/protobuf/internal/genid/go_features_gen.go index 9a652a2b424..7f67cbb6e97 100644 --- a/vendor/google.golang.org/protobuf/internal/genid/go_features_gen.go +++ b/vendor/google.golang.org/protobuf/internal/genid/go_features_gen.go @@ -12,20 +12,25 @@ import ( const File_google_protobuf_go_features_proto = "google/protobuf/go_features.proto" -// Names for google.protobuf.GoFeatures. +// Names for pb.GoFeatures. const ( GoFeatures_message_name protoreflect.Name = "GoFeatures" - GoFeatures_message_fullname protoreflect.FullName = "google.protobuf.GoFeatures" + GoFeatures_message_fullname protoreflect.FullName = "pb.GoFeatures" ) -// Field names for google.protobuf.GoFeatures. +// Field names for pb.GoFeatures. const ( GoFeatures_LegacyUnmarshalJsonEnum_field_name protoreflect.Name = "legacy_unmarshal_json_enum" - GoFeatures_LegacyUnmarshalJsonEnum_field_fullname protoreflect.FullName = "google.protobuf.GoFeatures.legacy_unmarshal_json_enum" + GoFeatures_LegacyUnmarshalJsonEnum_field_fullname protoreflect.FullName = "pb.GoFeatures.legacy_unmarshal_json_enum" ) -// Field numbers for google.protobuf.GoFeatures. +// Field numbers for pb.GoFeatures. const ( GoFeatures_LegacyUnmarshalJsonEnum_field_number protoreflect.FieldNumber = 1 ) + +// Extension numbers +const ( + FeatureSet_Go_ext_number protoreflect.FieldNumber = 1002 +) diff --git a/vendor/google.golang.org/protobuf/internal/genid/map_entry.go b/vendor/google.golang.org/protobuf/internal/genid/map_entry.go index 8f9ea02ff2a..bef5a25fbbf 100644 --- a/vendor/google.golang.org/protobuf/internal/genid/map_entry.go +++ b/vendor/google.golang.org/protobuf/internal/genid/map_entry.go @@ -4,7 +4,7 @@ package genid -import protoreflect "google.golang.org/protobuf/reflect/protoreflect" +import "google.golang.org/protobuf/reflect/protoreflect" // Generic field names and numbers for synthetic map entry messages. const ( diff --git a/vendor/google.golang.org/protobuf/internal/genid/wrappers.go b/vendor/google.golang.org/protobuf/internal/genid/wrappers.go index 429384b85b0..9404270de0b 100644 --- a/vendor/google.golang.org/protobuf/internal/genid/wrappers.go +++ b/vendor/google.golang.org/protobuf/internal/genid/wrappers.go @@ -4,7 +4,7 @@ package genid -import protoreflect "google.golang.org/protobuf/reflect/protoreflect" +import "google.golang.org/protobuf/reflect/protoreflect" // Generic field name and number for messages in wrappers.proto. const ( diff --git a/vendor/google.golang.org/protobuf/internal/impl/codec_extension.go b/vendor/google.golang.org/protobuf/internal/impl/codec_extension.go index 4bb0a7a20ce..0d5b546e0ee 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/codec_extension.go +++ b/vendor/google.golang.org/protobuf/internal/impl/codec_extension.go @@ -67,7 +67,6 @@ type lazyExtensionValue struct { xi *extensionFieldInfo value protoreflect.Value b []byte - fn func() protoreflect.Value } type ExtensionField struct { @@ -158,10 +157,9 @@ func (f *ExtensionField) lazyInit() { } f.lazy.value = val } else { - f.lazy.value = f.lazy.fn() + panic("No support for lazy fns for ExtensionField") } f.lazy.xi = nil - f.lazy.fn = nil f.lazy.b = nil atomic.StoreUint32(&f.lazy.atomicOnce, 1) } @@ -174,13 +172,6 @@ func (f *ExtensionField) Set(t protoreflect.ExtensionType, v protoreflect.Value) f.lazy = nil } -// SetLazy sets the type and a value that is to be lazily evaluated upon first use. -// This must not be called concurrently. -func (f *ExtensionField) SetLazy(t protoreflect.ExtensionType, fn func() protoreflect.Value) { - f.typ = t - f.lazy = &lazyExtensionValue{fn: fn} -} - // Value returns the value of the extension field. // This may be called concurrently. func (f *ExtensionField) Value() protoreflect.Value { diff --git a/vendor/google.golang.org/protobuf/internal/impl/codec_field.go b/vendor/google.golang.org/protobuf/internal/impl/codec_field.go index 78ee47e44b9..7c1f66c8c19 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/codec_field.go +++ b/vendor/google.golang.org/protobuf/internal/impl/codec_field.go @@ -65,6 +65,9 @@ func (mi *MessageInfo) initOneofFieldCoders(od protoreflect.OneofDescriptor, si if err != nil { return out, err } + if cf.funcs.isInit == nil { + out.initialized = true + } vi.Set(vw) return out, nil } diff --git a/vendor/google.golang.org/protobuf/internal/impl/codec_message.go b/vendor/google.golang.org/protobuf/internal/impl/codec_message.go index 6b2fdbb739a..78be9df3420 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/codec_message.go +++ b/vendor/google.golang.org/protobuf/internal/impl/codec_message.go @@ -189,6 +189,9 @@ func (mi *MessageInfo) makeCoderMethods(t reflect.Type, si structInfo) { if mi.methods.Merge == nil { mi.methods.Merge = mi.merge } + if mi.methods.Equal == nil { + mi.methods.Equal = equal + } } // getUnknownBytes returns a *[]byte for the unknown fields. diff --git a/vendor/google.golang.org/protobuf/internal/impl/codec_reflect.go b/vendor/google.golang.org/protobuf/internal/impl/codec_reflect.go deleted file mode 100644 index 145c577bd6b..00000000000 --- a/vendor/google.golang.org/protobuf/internal/impl/codec_reflect.go +++ /dev/null @@ -1,210 +0,0 @@ -// Copyright 2019 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build purego || appengine -// +build purego appengine - -package impl - -import ( - "reflect" - - "google.golang.org/protobuf/encoding/protowire" -) - -func sizeEnum(p pointer, f *coderFieldInfo, _ marshalOptions) (size int) { - v := p.v.Elem().Int() - return f.tagsize + protowire.SizeVarint(uint64(v)) -} - -func appendEnum(b []byte, p pointer, f *coderFieldInfo, opts marshalOptions) ([]byte, error) { - v := p.v.Elem().Int() - b = protowire.AppendVarint(b, f.wiretag) - b = protowire.AppendVarint(b, uint64(v)) - return b, nil -} - -func consumeEnum(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, _ unmarshalOptions) (out unmarshalOutput, err error) { - if wtyp != protowire.VarintType { - return out, errUnknown - } - v, n := protowire.ConsumeVarint(b) - if n < 0 { - return out, errDecode - } - p.v.Elem().SetInt(int64(v)) - out.n = n - return out, nil -} - -func mergeEnum(dst, src pointer, _ *coderFieldInfo, _ mergeOptions) { - dst.v.Elem().Set(src.v.Elem()) -} - -var coderEnum = pointerCoderFuncs{ - size: sizeEnum, - marshal: appendEnum, - unmarshal: consumeEnum, - merge: mergeEnum, -} - -func sizeEnumNoZero(p pointer, f *coderFieldInfo, opts marshalOptions) (size int) { - if p.v.Elem().Int() == 0 { - return 0 - } - return sizeEnum(p, f, opts) -} - -func appendEnumNoZero(b []byte, p pointer, f *coderFieldInfo, opts marshalOptions) ([]byte, error) { - if p.v.Elem().Int() == 0 { - return b, nil - } - return appendEnum(b, p, f, opts) -} - -func mergeEnumNoZero(dst, src pointer, _ *coderFieldInfo, _ mergeOptions) { - if src.v.Elem().Int() != 0 { - dst.v.Elem().Set(src.v.Elem()) - } -} - -var coderEnumNoZero = pointerCoderFuncs{ - size: sizeEnumNoZero, - marshal: appendEnumNoZero, - unmarshal: consumeEnum, - merge: mergeEnumNoZero, -} - -func sizeEnumPtr(p pointer, f *coderFieldInfo, opts marshalOptions) (size int) { - return sizeEnum(pointer{p.v.Elem()}, f, opts) -} - -func appendEnumPtr(b []byte, p pointer, f *coderFieldInfo, opts marshalOptions) ([]byte, error) { - return appendEnum(b, pointer{p.v.Elem()}, f, opts) -} - -func consumeEnumPtr(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) { - if wtyp != protowire.VarintType { - return out, errUnknown - } - if p.v.Elem().IsNil() { - p.v.Elem().Set(reflect.New(p.v.Elem().Type().Elem())) - } - return consumeEnum(b, pointer{p.v.Elem()}, wtyp, f, opts) -} - -func mergeEnumPtr(dst, src pointer, _ *coderFieldInfo, _ mergeOptions) { - if !src.v.Elem().IsNil() { - v := reflect.New(dst.v.Type().Elem().Elem()) - v.Elem().Set(src.v.Elem().Elem()) - dst.v.Elem().Set(v) - } -} - -var coderEnumPtr = pointerCoderFuncs{ - size: sizeEnumPtr, - marshal: appendEnumPtr, - unmarshal: consumeEnumPtr, - merge: mergeEnumPtr, -} - -func sizeEnumSlice(p pointer, f *coderFieldInfo, opts marshalOptions) (size int) { - s := p.v.Elem() - for i, llen := 0, s.Len(); i < llen; i++ { - size += protowire.SizeVarint(uint64(s.Index(i).Int())) + f.tagsize - } - return size -} - -func appendEnumSlice(b []byte, p pointer, f *coderFieldInfo, opts marshalOptions) ([]byte, error) { - s := p.v.Elem() - for i, llen := 0, s.Len(); i < llen; i++ { - b = protowire.AppendVarint(b, f.wiretag) - b = protowire.AppendVarint(b, uint64(s.Index(i).Int())) - } - return b, nil -} - -func consumeEnumSlice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) { - s := p.v.Elem() - if wtyp == protowire.BytesType { - b, n := protowire.ConsumeBytes(b) - if n < 0 { - return out, errDecode - } - for len(b) > 0 { - v, n := protowire.ConsumeVarint(b) - if n < 0 { - return out, errDecode - } - rv := reflect.New(s.Type().Elem()).Elem() - rv.SetInt(int64(v)) - s.Set(reflect.Append(s, rv)) - b = b[n:] - } - out.n = n - return out, nil - } - if wtyp != protowire.VarintType { - return out, errUnknown - } - v, n := protowire.ConsumeVarint(b) - if n < 0 { - return out, errDecode - } - rv := reflect.New(s.Type().Elem()).Elem() - rv.SetInt(int64(v)) - s.Set(reflect.Append(s, rv)) - out.n = n - return out, nil -} - -func mergeEnumSlice(dst, src pointer, _ *coderFieldInfo, _ mergeOptions) { - dst.v.Elem().Set(reflect.AppendSlice(dst.v.Elem(), src.v.Elem())) -} - -var coderEnumSlice = pointerCoderFuncs{ - size: sizeEnumSlice, - marshal: appendEnumSlice, - unmarshal: consumeEnumSlice, - merge: mergeEnumSlice, -} - -func sizeEnumPackedSlice(p pointer, f *coderFieldInfo, opts marshalOptions) (size int) { - s := p.v.Elem() - llen := s.Len() - if llen == 0 { - return 0 - } - n := 0 - for i := 0; i < llen; i++ { - n += protowire.SizeVarint(uint64(s.Index(i).Int())) - } - return f.tagsize + protowire.SizeBytes(n) -} - -func appendEnumPackedSlice(b []byte, p pointer, f *coderFieldInfo, opts marshalOptions) ([]byte, error) { - s := p.v.Elem() - llen := s.Len() - if llen == 0 { - return b, nil - } - b = protowire.AppendVarint(b, f.wiretag) - n := 0 - for i := 0; i < llen; i++ { - n += protowire.SizeVarint(uint64(s.Index(i).Int())) - } - b = protowire.AppendVarint(b, uint64(n)) - for i := 0; i < llen; i++ { - b = protowire.AppendVarint(b, uint64(s.Index(i).Int())) - } - return b, nil -} - -var coderEnumPackedSlice = pointerCoderFuncs{ - size: sizeEnumPackedSlice, - marshal: appendEnumPackedSlice, - unmarshal: consumeEnumSlice, - merge: mergeEnumSlice, -} diff --git a/vendor/google.golang.org/protobuf/internal/impl/codec_unsafe.go b/vendor/google.golang.org/protobuf/internal/impl/codec_unsafe.go index 757642e23c9..077712c2c5a 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/codec_unsafe.go +++ b/vendor/google.golang.org/protobuf/internal/impl/codec_unsafe.go @@ -2,9 +2,6 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build !purego && !appengine -// +build !purego,!appengine - package impl // When using unsafe pointers, we can just treat enum values as int32s. diff --git a/vendor/google.golang.org/protobuf/internal/impl/convert.go b/vendor/google.golang.org/protobuf/internal/impl/convert.go index e06ece55a26..f72ddd882f3 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/convert.go +++ b/vendor/google.golang.org/protobuf/internal/impl/convert.go @@ -322,7 +322,7 @@ func (c *stringConverter) PBValueOf(v reflect.Value) protoreflect.Value { return protoreflect.ValueOfString(v.Convert(stringType).String()) } func (c *stringConverter) GoValueOf(v protoreflect.Value) reflect.Value { - // pref.Value.String never panics, so we go through an interface + // protoreflect.Value.String never panics, so we go through an interface // conversion here to check the type. s := v.Interface().(string) if c.goType.Kind() == reflect.Slice && s == "" { diff --git a/vendor/google.golang.org/protobuf/internal/impl/encode.go b/vendor/google.golang.org/protobuf/internal/impl/encode.go index febd2122472..6254f5de41f 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/encode.go +++ b/vendor/google.golang.org/protobuf/internal/impl/encode.go @@ -10,7 +10,7 @@ import ( "sync/atomic" "google.golang.org/protobuf/internal/flags" - proto "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/proto" piface "google.golang.org/protobuf/runtime/protoiface" ) diff --git a/vendor/google.golang.org/protobuf/internal/impl/equal.go b/vendor/google.golang.org/protobuf/internal/impl/equal.go new file mode 100644 index 00000000000..9f6c32a7d8c --- /dev/null +++ b/vendor/google.golang.org/protobuf/internal/impl/equal.go @@ -0,0 +1,224 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package impl + +import ( + "bytes" + + "google.golang.org/protobuf/encoding/protowire" + "google.golang.org/protobuf/reflect/protoreflect" + "google.golang.org/protobuf/runtime/protoiface" +) + +func equal(in protoiface.EqualInput) protoiface.EqualOutput { + return protoiface.EqualOutput{Equal: equalMessage(in.MessageA, in.MessageB)} +} + +// equalMessage is a fast-path variant of protoreflect.equalMessage. +// It takes advantage of the internal messageState type to avoid +// unnecessary allocations, type assertions. +func equalMessage(mx, my protoreflect.Message) bool { + if mx == nil || my == nil { + return mx == my + } + if mx.Descriptor() != my.Descriptor() { + return false + } + + msx, ok := mx.(*messageState) + if !ok { + return protoreflect.ValueOfMessage(mx).Equal(protoreflect.ValueOfMessage(my)) + } + msy, ok := my.(*messageState) + if !ok { + return protoreflect.ValueOfMessage(mx).Equal(protoreflect.ValueOfMessage(my)) + } + + mi := msx.messageInfo() + miy := msy.messageInfo() + if mi != miy { + return protoreflect.ValueOfMessage(mx).Equal(protoreflect.ValueOfMessage(my)) + } + mi.init() + // Compares regular fields + // Modified Message.Range code that compares two messages of the same type + // while going over the fields. + for _, ri := range mi.rangeInfos { + var fd protoreflect.FieldDescriptor + var vx, vy protoreflect.Value + + switch ri := ri.(type) { + case *fieldInfo: + hx := ri.has(msx.pointer()) + hy := ri.has(msy.pointer()) + if hx != hy { + return false + } + if !hx { + continue + } + fd = ri.fieldDesc + vx = ri.get(msx.pointer()) + vy = ri.get(msy.pointer()) + case *oneofInfo: + fnx := ri.which(msx.pointer()) + fny := ri.which(msy.pointer()) + if fnx != fny { + return false + } + if fnx <= 0 { + continue + } + fi := mi.fields[fnx] + fd = fi.fieldDesc + vx = fi.get(msx.pointer()) + vy = fi.get(msy.pointer()) + } + + if !equalValue(fd, vx, vy) { + return false + } + } + + // Compare extensions. + // This is more complicated because mx or my could have empty/nil extension maps, + // however some populated extension map values are equal to nil extension maps. + emx := mi.extensionMap(msx.pointer()) + emy := mi.extensionMap(msy.pointer()) + if emx != nil { + for k, x := range *emx { + xd := x.Type().TypeDescriptor() + xv := x.Value() + var y ExtensionField + ok := false + if emy != nil { + y, ok = (*emy)[k] + } + // We need to treat empty lists as equal to nil values + if emy == nil || !ok { + if xd.IsList() && xv.List().Len() == 0 { + continue + } + return false + } + + if !equalValue(xd, xv, y.Value()) { + return false + } + } + } + if emy != nil { + // emy may have extensions emx does not have, need to check them as well + for k, y := range *emy { + if emx != nil { + // emx has the field, so we already checked it + if _, ok := (*emx)[k]; ok { + continue + } + } + // Empty lists are equal to nil + if y.Type().TypeDescriptor().IsList() && y.Value().List().Len() == 0 { + continue + } + + // Cant be equal if the extension is populated + return false + } + } + + return equalUnknown(mx.GetUnknown(), my.GetUnknown()) +} + +func equalValue(fd protoreflect.FieldDescriptor, vx, vy protoreflect.Value) bool { + // slow path + if fd.Kind() != protoreflect.MessageKind { + return vx.Equal(vy) + } + + // fast path special cases + if fd.IsMap() { + if fd.MapValue().Kind() == protoreflect.MessageKind { + return equalMessageMap(vx.Map(), vy.Map()) + } + return vx.Equal(vy) + } + + if fd.IsList() { + return equalMessageList(vx.List(), vy.List()) + } + + return equalMessage(vx.Message(), vy.Message()) +} + +// Mostly copied from protoreflect.equalMap. +// This variant only works for messages as map types. +// All other map types should be handled via Value.Equal. +func equalMessageMap(mx, my protoreflect.Map) bool { + if mx.Len() != my.Len() { + return false + } + equal := true + mx.Range(func(k protoreflect.MapKey, vx protoreflect.Value) bool { + if !my.Has(k) { + equal = false + return false + } + vy := my.Get(k) + equal = equalMessage(vx.Message(), vy.Message()) + return equal + }) + return equal +} + +// Mostly copied from protoreflect.equalList. +// The only change is the usage of equalImpl instead of protoreflect.equalValue. +func equalMessageList(lx, ly protoreflect.List) bool { + if lx.Len() != ly.Len() { + return false + } + for i := 0; i < lx.Len(); i++ { + // We only operate on messages here since equalImpl will not call us in any other case. + if !equalMessage(lx.Get(i).Message(), ly.Get(i).Message()) { + return false + } + } + return true +} + +// equalUnknown compares unknown fields by direct comparison on the raw bytes +// of each individual field number. +// Copied from protoreflect.equalUnknown. +func equalUnknown(x, y protoreflect.RawFields) bool { + if len(x) != len(y) { + return false + } + if bytes.Equal([]byte(x), []byte(y)) { + return true + } + + mx := make(map[protoreflect.FieldNumber]protoreflect.RawFields) + my := make(map[protoreflect.FieldNumber]protoreflect.RawFields) + for len(x) > 0 { + fnum, _, n := protowire.ConsumeField(x) + mx[fnum] = append(mx[fnum], x[:n]...) + x = x[n:] + } + for len(y) > 0 { + fnum, _, n := protowire.ConsumeField(y) + my[fnum] = append(my[fnum], y[:n]...) + y = y[n:] + } + if len(mx) != len(my) { + return false + } + + for k, v1 := range mx { + if v2, ok := my[k]; !ok || !bytes.Equal([]byte(v1), []byte(v2)) { + return false + } + } + + return true +} diff --git a/vendor/google.golang.org/protobuf/internal/impl/legacy_extension.go b/vendor/google.golang.org/protobuf/internal/impl/legacy_extension.go index 6e8677ee633..b6849d66927 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/legacy_extension.go +++ b/vendor/google.golang.org/protobuf/internal/impl/legacy_extension.go @@ -160,6 +160,7 @@ func (x placeholderExtension) HasPresence() bool func (x placeholderExtension) HasOptionalKeyword() bool { return false } func (x placeholderExtension) IsExtension() bool { return true } func (x placeholderExtension) IsWeak() bool { return false } +func (x placeholderExtension) IsLazy() bool { return false } func (x placeholderExtension) IsPacked() bool { return false } func (x placeholderExtension) IsList() bool { return false } func (x placeholderExtension) IsMap() bool { return false } diff --git a/vendor/google.golang.org/protobuf/internal/impl/message.go b/vendor/google.golang.org/protobuf/internal/impl/message.go index 019399d454d..741b5ed29cf 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/message.go +++ b/vendor/google.golang.org/protobuf/internal/impl/message.go @@ -30,8 +30,8 @@ type MessageInfo struct { // Desc is the underlying message descriptor type and must be populated. Desc protoreflect.MessageDescriptor - // Exporter must be provided in a purego environment in order to provide - // access to unexported fields. + // Deprecated: Exporter will be removed the next time we bump + // protoimpl.GenVersion. See https://github.com/golang/protobuf/issues/1640 Exporter exporter // OneofWrappers is list of pointers to oneof wrapper struct types. diff --git a/vendor/google.golang.org/protobuf/internal/impl/pointer_reflect.go b/vendor/google.golang.org/protobuf/internal/impl/pointer_reflect.go deleted file mode 100644 index da685e8a29d..00000000000 --- a/vendor/google.golang.org/protobuf/internal/impl/pointer_reflect.go +++ /dev/null @@ -1,215 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build purego || appengine -// +build purego appengine - -package impl - -import ( - "fmt" - "reflect" - "sync" -) - -const UnsafeEnabled = false - -// Pointer is an opaque pointer type. -type Pointer any - -// offset represents the offset to a struct field, accessible from a pointer. -// The offset is the field index into a struct. -type offset struct { - index int - export exporter -} - -// offsetOf returns a field offset for the struct field. -func offsetOf(f reflect.StructField, x exporter) offset { - if len(f.Index) != 1 { - panic("embedded structs are not supported") - } - if f.PkgPath == "" { - return offset{index: f.Index[0]} // field is already exported - } - if x == nil { - panic("exporter must be provided for unexported field") - } - return offset{index: f.Index[0], export: x} -} - -// IsValid reports whether the offset is valid. -func (f offset) IsValid() bool { return f.index >= 0 } - -// invalidOffset is an invalid field offset. -var invalidOffset = offset{index: -1} - -// zeroOffset is a noop when calling pointer.Apply. -var zeroOffset = offset{index: 0} - -// pointer is an abstract representation of a pointer to a struct or field. -type pointer struct{ v reflect.Value } - -// pointerOf returns p as a pointer. -func pointerOf(p Pointer) pointer { - return pointerOfIface(p) -} - -// pointerOfValue returns v as a pointer. -func pointerOfValue(v reflect.Value) pointer { - return pointer{v: v} -} - -// pointerOfIface returns the pointer portion of an interface. -func pointerOfIface(v any) pointer { - return pointer{v: reflect.ValueOf(v)} -} - -// IsNil reports whether the pointer is nil. -func (p pointer) IsNil() bool { - return p.v.IsNil() -} - -// Apply adds an offset to the pointer to derive a new pointer -// to a specified field. The current pointer must be pointing at a struct. -func (p pointer) Apply(f offset) pointer { - if f.export != nil { - if v := reflect.ValueOf(f.export(p.v.Interface(), f.index)); v.IsValid() { - return pointer{v: v} - } - } - return pointer{v: p.v.Elem().Field(f.index).Addr()} -} - -// AsValueOf treats p as a pointer to an object of type t and returns the value. -// It is equivalent to reflect.ValueOf(p.AsIfaceOf(t)) -func (p pointer) AsValueOf(t reflect.Type) reflect.Value { - if got := p.v.Type().Elem(); got != t { - panic(fmt.Sprintf("invalid type: got %v, want %v", got, t)) - } - return p.v -} - -// AsIfaceOf treats p as a pointer to an object of type t and returns the value. -// It is equivalent to p.AsValueOf(t).Interface() -func (p pointer) AsIfaceOf(t reflect.Type) any { - return p.AsValueOf(t).Interface() -} - -func (p pointer) Bool() *bool { return p.v.Interface().(*bool) } -func (p pointer) BoolPtr() **bool { return p.v.Interface().(**bool) } -func (p pointer) BoolSlice() *[]bool { return p.v.Interface().(*[]bool) } -func (p pointer) Int32() *int32 { return p.v.Interface().(*int32) } -func (p pointer) Int32Ptr() **int32 { return p.v.Interface().(**int32) } -func (p pointer) Int32Slice() *[]int32 { return p.v.Interface().(*[]int32) } -func (p pointer) Int64() *int64 { return p.v.Interface().(*int64) } -func (p pointer) Int64Ptr() **int64 { return p.v.Interface().(**int64) } -func (p pointer) Int64Slice() *[]int64 { return p.v.Interface().(*[]int64) } -func (p pointer) Uint32() *uint32 { return p.v.Interface().(*uint32) } -func (p pointer) Uint32Ptr() **uint32 { return p.v.Interface().(**uint32) } -func (p pointer) Uint32Slice() *[]uint32 { return p.v.Interface().(*[]uint32) } -func (p pointer) Uint64() *uint64 { return p.v.Interface().(*uint64) } -func (p pointer) Uint64Ptr() **uint64 { return p.v.Interface().(**uint64) } -func (p pointer) Uint64Slice() *[]uint64 { return p.v.Interface().(*[]uint64) } -func (p pointer) Float32() *float32 { return p.v.Interface().(*float32) } -func (p pointer) Float32Ptr() **float32 { return p.v.Interface().(**float32) } -func (p pointer) Float32Slice() *[]float32 { return p.v.Interface().(*[]float32) } -func (p pointer) Float64() *float64 { return p.v.Interface().(*float64) } -func (p pointer) Float64Ptr() **float64 { return p.v.Interface().(**float64) } -func (p pointer) Float64Slice() *[]float64 { return p.v.Interface().(*[]float64) } -func (p pointer) String() *string { return p.v.Interface().(*string) } -func (p pointer) StringPtr() **string { return p.v.Interface().(**string) } -func (p pointer) StringSlice() *[]string { return p.v.Interface().(*[]string) } -func (p pointer) Bytes() *[]byte { return p.v.Interface().(*[]byte) } -func (p pointer) BytesPtr() **[]byte { return p.v.Interface().(**[]byte) } -func (p pointer) BytesSlice() *[][]byte { return p.v.Interface().(*[][]byte) } -func (p pointer) WeakFields() *weakFields { return (*weakFields)(p.v.Interface().(*WeakFields)) } -func (p pointer) Extensions() *map[int32]ExtensionField { - return p.v.Interface().(*map[int32]ExtensionField) -} - -func (p pointer) Elem() pointer { - return pointer{v: p.v.Elem()} -} - -// PointerSlice copies []*T from p as a new []pointer. -// This behavior differs from the implementation in pointer_unsafe.go. -func (p pointer) PointerSlice() []pointer { - // TODO: reconsider this - if p.v.IsNil() { - return nil - } - n := p.v.Elem().Len() - s := make([]pointer, n) - for i := 0; i < n; i++ { - s[i] = pointer{v: p.v.Elem().Index(i)} - } - return s -} - -// AppendPointerSlice appends v to p, which must be a []*T. -func (p pointer) AppendPointerSlice(v pointer) { - sp := p.v.Elem() - sp.Set(reflect.Append(sp, v.v)) -} - -// SetPointer sets *p to v. -func (p pointer) SetPointer(v pointer) { - p.v.Elem().Set(v.v) -} - -func growSlice(p pointer, addCap int) { - // TODO: Once we only support Go 1.20 and newer, use reflect.Grow. - in := p.v.Elem() - out := reflect.MakeSlice(in.Type(), in.Len(), in.Len()+addCap) - reflect.Copy(out, in) - p.v.Elem().Set(out) -} - -func (p pointer) growBoolSlice(addCap int) { - growSlice(p, addCap) -} - -func (p pointer) growInt32Slice(addCap int) { - growSlice(p, addCap) -} - -func (p pointer) growUint32Slice(addCap int) { - growSlice(p, addCap) -} - -func (p pointer) growInt64Slice(addCap int) { - growSlice(p, addCap) -} - -func (p pointer) growUint64Slice(addCap int) { - growSlice(p, addCap) -} - -func (p pointer) growFloat64Slice(addCap int) { - growSlice(p, addCap) -} - -func (p pointer) growFloat32Slice(addCap int) { - growSlice(p, addCap) -} - -func (Export) MessageStateOf(p Pointer) *messageState { panic("not supported") } -func (ms *messageState) pointer() pointer { panic("not supported") } -func (ms *messageState) messageInfo() *MessageInfo { panic("not supported") } -func (ms *messageState) LoadMessageInfo() *MessageInfo { panic("not supported") } -func (ms *messageState) StoreMessageInfo(mi *MessageInfo) { panic("not supported") } - -type atomicNilMessage struct { - once sync.Once - m messageReflectWrapper -} - -func (m *atomicNilMessage) Init(mi *MessageInfo) *messageReflectWrapper { - m.once.Do(func() { - m.m.p = pointerOfIface(reflect.Zero(mi.GoReflectType).Interface()) - m.m.mi = mi - }) - return &m.m -} diff --git a/vendor/google.golang.org/protobuf/internal/impl/pointer_unsafe.go b/vendor/google.golang.org/protobuf/internal/impl/pointer_unsafe.go index 5f20ca5d8ab..79e186667b7 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/pointer_unsafe.go +++ b/vendor/google.golang.org/protobuf/internal/impl/pointer_unsafe.go @@ -2,9 +2,6 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build !purego && !appengine -// +build !purego,!appengine - package impl import ( diff --git a/vendor/google.golang.org/protobuf/internal/strs/strings_pure.go b/vendor/google.golang.org/protobuf/internal/strs/strings_pure.go deleted file mode 100644 index a1f6f333860..00000000000 --- a/vendor/google.golang.org/protobuf/internal/strs/strings_pure.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build purego || appengine -// +build purego appengine - -package strs - -import pref "google.golang.org/protobuf/reflect/protoreflect" - -func UnsafeString(b []byte) string { - return string(b) -} - -func UnsafeBytes(s string) []byte { - return []byte(s) -} - -type Builder struct{} - -func (*Builder) AppendFullName(prefix pref.FullName, name pref.Name) pref.FullName { - return prefix.Append(name) -} - -func (*Builder) MakeString(b []byte) string { - return string(b) -} diff --git a/vendor/google.golang.org/protobuf/internal/strs/strings_unsafe_go120.go b/vendor/google.golang.org/protobuf/internal/strs/strings_unsafe_go120.go index a008acd0908..832a7988f14 100644 --- a/vendor/google.golang.org/protobuf/internal/strs/strings_unsafe_go120.go +++ b/vendor/google.golang.org/protobuf/internal/strs/strings_unsafe_go120.go @@ -2,8 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build !purego && !appengine && !go1.21 -// +build !purego,!appengine,!go1.21 +//go:build !go1.21 package strs diff --git a/vendor/google.golang.org/protobuf/internal/strs/strings_unsafe_go121.go b/vendor/google.golang.org/protobuf/internal/strs/strings_unsafe_go121.go index 60166f2ba3c..1ffddf6877a 100644 --- a/vendor/google.golang.org/protobuf/internal/strs/strings_unsafe_go121.go +++ b/vendor/google.golang.org/protobuf/internal/strs/strings_unsafe_go121.go @@ -2,8 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build !purego && !appengine && go1.21 -// +build !purego,!appengine,go1.21 +//go:build go1.21 package strs diff --git a/vendor/google.golang.org/protobuf/internal/version/version.go b/vendor/google.golang.org/protobuf/internal/version/version.go index dbbf1f6862c..fb8e15e8dad 100644 --- a/vendor/google.golang.org/protobuf/internal/version/version.go +++ b/vendor/google.golang.org/protobuf/internal/version/version.go @@ -51,8 +51,8 @@ import ( // 10. Send out the CL for review and submit it. const ( Major = 1 - Minor = 34 - Patch = 2 + Minor = 35 + Patch = 1 PreRelease = "" ) diff --git a/vendor/google.golang.org/protobuf/proto/equal.go b/vendor/google.golang.org/protobuf/proto/equal.go index 1a0be1b03c7..c36d4a9cd75 100644 --- a/vendor/google.golang.org/protobuf/proto/equal.go +++ b/vendor/google.golang.org/protobuf/proto/equal.go @@ -8,6 +8,7 @@ import ( "reflect" "google.golang.org/protobuf/reflect/protoreflect" + "google.golang.org/protobuf/runtime/protoiface" ) // Equal reports whether two messages are equal, @@ -51,6 +52,14 @@ func Equal(x, y Message) bool { if mx.IsValid() != my.IsValid() { return false } + + // Only one of the messages needs to implement the fast-path for it to work. + pmx := protoMethods(mx) + pmy := protoMethods(my) + if pmx != nil && pmy != nil && pmx.Equal != nil && pmy.Equal != nil { + return pmx.Equal(protoiface.EqualInput{MessageA: mx, MessageB: my}).Equal + } + vx := protoreflect.ValueOfMessage(mx) vy := protoreflect.ValueOfMessage(my) return vx.Equal(vy) diff --git a/vendor/google.golang.org/protobuf/proto/extension.go b/vendor/google.golang.org/protobuf/proto/extension.go index d248f292846..78445d116f7 100644 --- a/vendor/google.golang.org/protobuf/proto/extension.go +++ b/vendor/google.golang.org/protobuf/proto/extension.go @@ -39,6 +39,48 @@ func ClearExtension(m Message, xt protoreflect.ExtensionType) { // If the field is unpopulated, it returns the default value for // scalars and an immutable, empty value for lists or messages. // It panics if xt does not extend m. +// +// The type of the value is dependent on the field type of the extension. +// For extensions generated by protoc-gen-go, the Go type is as follows: +// +// ╔═══════════════════╤═════════════════════════╗ +// ║ Go type │ Protobuf kind ║ +// ╠═══════════════════╪═════════════════════════╣ +// ║ bool │ bool ║ +// ║ int32 │ int32, sint32, sfixed32 ║ +// ║ int64 │ int64, sint64, sfixed64 ║ +// ║ uint32 │ uint32, fixed32 ║ +// ║ uint64 │ uint64, fixed64 ║ +// ║ float32 │ float ║ +// ║ float64 │ double ║ +// ║ string │ string ║ +// ║ []byte │ bytes ║ +// ║ protoreflect.Enum │ enum ║ +// ║ proto.Message │ message, group ║ +// ╚═══════════════════╧═════════════════════════╝ +// +// The protoreflect.Enum and proto.Message types are the concrete Go type +// associated with the named enum or message. Repeated fields are represented +// using a Go slice of the base element type. +// +// If a generated extension descriptor variable is directly passed to +// GetExtension, then the call should be followed immediately by a +// type assertion to the expected output value. For example: +// +// mm := proto.GetExtension(m, foopb.E_MyExtension).(*foopb.MyMessage) +// +// This pattern enables static analysis tools to verify that the asserted type +// matches the Go type associated with the extension field and +// also enables a possible future migration to a type-safe extension API. +// +// Since singular messages are the most common extension type, the pattern of +// calling HasExtension followed by GetExtension may be simplified to: +// +// if mm := proto.GetExtension(m, foopb.E_MyExtension).(*foopb.MyMessage); mm != nil { +// ... // make use of mm +// } +// +// The mm variable is non-nil if and only if HasExtension reports true. func GetExtension(m Message, xt protoreflect.ExtensionType) any { // Treat nil message interface as an empty message; return the default. if m == nil { @@ -51,6 +93,35 @@ func GetExtension(m Message, xt protoreflect.ExtensionType) any { // SetExtension stores the value of an extension field. // It panics if m is invalid, xt does not extend m, or if type of v // is invalid for the specified extension field. +// +// The type of the value is dependent on the field type of the extension. +// For extensions generated by protoc-gen-go, the Go type is as follows: +// +// ╔═══════════════════╤═════════════════════════╗ +// ║ Go type │ Protobuf kind ║ +// ╠═══════════════════╪═════════════════════════╣ +// ║ bool │ bool ║ +// ║ int32 │ int32, sint32, sfixed32 ║ +// ║ int64 │ int64, sint64, sfixed64 ║ +// ║ uint32 │ uint32, fixed32 ║ +// ║ uint64 │ uint64, fixed64 ║ +// ║ float32 │ float ║ +// ║ float64 │ double ║ +// ║ string │ string ║ +// ║ []byte │ bytes ║ +// ║ protoreflect.Enum │ enum ║ +// ║ proto.Message │ message, group ║ +// ╚═══════════════════╧═════════════════════════╝ +// +// The protoreflect.Enum and proto.Message types are the concrete Go type +// associated with the named enum or message. Repeated fields are represented +// using a Go slice of the base element type. +// +// If a generated extension descriptor variable is directly passed to +// SetExtension (e.g., foopb.E_MyExtension), then the value should be a +// concrete type that matches the expected Go type for the extension descriptor +// so that static analysis tools can verify type correctness. +// This also enables a possible future migration to a type-safe extension API. func SetExtension(m Message, xt protoreflect.ExtensionType, v any) { xd := xt.TypeDescriptor() pv := xt.ValueOf(v) diff --git a/vendor/google.golang.org/protobuf/reflect/protodesc/desc_init.go b/vendor/google.golang.org/protobuf/reflect/protodesc/desc_init.go index 85617554272..ebcb4a8ab13 100644 --- a/vendor/google.golang.org/protobuf/reflect/protodesc/desc_init.go +++ b/vendor/google.golang.org/protobuf/reflect/protodesc/desc_init.go @@ -150,6 +150,7 @@ func (r descsByName) initFieldsFromDescriptorProto(fds []*descriptorpb.FieldDesc opts = proto.Clone(opts).(*descriptorpb.FieldOptions) f.L1.Options = func() protoreflect.ProtoMessage { return opts } f.L1.IsWeak = opts.GetWeak() + f.L1.IsLazy = opts.GetLazy() if opts.Packed != nil { f.L1.EditionFeatures.IsPacked = opts.GetPacked() } @@ -214,6 +215,9 @@ func (r descsByName) initExtensionDeclarations(xds []*descriptorpb.FieldDescript if xd.JsonName != nil { x.L2.StringName.InitJSON(xd.GetJsonName()) } + if x.L1.Kind == protoreflect.MessageKind && x.L1.EditionFeatures.IsDelimitedEncoded { + x.L1.Kind = protoreflect.GroupKind + } } return xs, nil } diff --git a/vendor/google.golang.org/protobuf/reflect/protodesc/editions.go b/vendor/google.golang.org/protobuf/reflect/protodesc/editions.go index 804830eda36..002e0047aea 100644 --- a/vendor/google.golang.org/protobuf/reflect/protodesc/editions.go +++ b/vendor/google.golang.org/protobuf/reflect/protodesc/editions.go @@ -14,7 +14,7 @@ import ( "google.golang.org/protobuf/proto" "google.golang.org/protobuf/reflect/protoreflect" "google.golang.org/protobuf/types/descriptorpb" - gofeaturespb "google.golang.org/protobuf/types/gofeaturespb" + "google.golang.org/protobuf/types/gofeaturespb" ) var defaults = &descriptorpb.FeatureSetDefaults{} diff --git a/vendor/google.golang.org/protobuf/reflect/protoreflect/methods.go b/vendor/google.golang.org/protobuf/reflect/protoreflect/methods.go index d5d5af6ebed..742cb518c40 100644 --- a/vendor/google.golang.org/protobuf/reflect/protoreflect/methods.go +++ b/vendor/google.golang.org/protobuf/reflect/protoreflect/methods.go @@ -23,6 +23,7 @@ type ( Unmarshal func(unmarshalInput) (unmarshalOutput, error) Merge func(mergeInput) mergeOutput CheckInitialized func(checkInitializedInput) (checkInitializedOutput, error) + Equal func(equalInput) equalOutput } supportFlags = uint64 sizeInput = struct { @@ -75,4 +76,13 @@ type ( checkInitializedOutput = struct { pragma.NoUnkeyedLiterals } + equalInput = struct { + pragma.NoUnkeyedLiterals + MessageA Message + MessageB Message + } + equalOutput = struct { + pragma.NoUnkeyedLiterals + Equal bool + } ) diff --git a/vendor/google.golang.org/protobuf/reflect/protoreflect/value_pure.go b/vendor/google.golang.org/protobuf/reflect/protoreflect/value_pure.go deleted file mode 100644 index 75f83a2af03..00000000000 --- a/vendor/google.golang.org/protobuf/reflect/protoreflect/value_pure.go +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build purego || appengine -// +build purego appengine - -package protoreflect - -import "google.golang.org/protobuf/internal/pragma" - -type valueType int - -const ( - nilType valueType = iota - boolType - int32Type - int64Type - uint32Type - uint64Type - float32Type - float64Type - stringType - bytesType - enumType - ifaceType -) - -// value is a union where only one type can be represented at a time. -// This uses a distinct field for each type. This is type safe in Go, but -// occupies more memory than necessary (72B). -type value struct { - pragma.DoNotCompare // 0B - - typ valueType // 8B - num uint64 // 8B - str string // 16B - bin []byte // 24B - iface any // 16B -} - -func valueOfString(v string) Value { - return Value{typ: stringType, str: v} -} -func valueOfBytes(v []byte) Value { - return Value{typ: bytesType, bin: v} -} -func valueOfIface(v any) Value { - return Value{typ: ifaceType, iface: v} -} - -func (v Value) getString() string { - return v.str -} -func (v Value) getBytes() []byte { - return v.bin -} -func (v Value) getIface() any { - return v.iface -} diff --git a/vendor/google.golang.org/protobuf/reflect/protoreflect/value_unsafe_go120.go b/vendor/google.golang.org/protobuf/reflect/protoreflect/value_unsafe_go120.go index 7f3583ead81..0015fcb35d8 100644 --- a/vendor/google.golang.org/protobuf/reflect/protoreflect/value_unsafe_go120.go +++ b/vendor/google.golang.org/protobuf/reflect/protoreflect/value_unsafe_go120.go @@ -2,8 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build !purego && !appengine && !go1.21 -// +build !purego,!appengine,!go1.21 +//go:build !go1.21 package protoreflect diff --git a/vendor/google.golang.org/protobuf/reflect/protoreflect/value_unsafe_go121.go b/vendor/google.golang.org/protobuf/reflect/protoreflect/value_unsafe_go121.go index f7d386990a0..479527b58dd 100644 --- a/vendor/google.golang.org/protobuf/reflect/protoreflect/value_unsafe_go121.go +++ b/vendor/google.golang.org/protobuf/reflect/protoreflect/value_unsafe_go121.go @@ -2,8 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build !purego && !appengine && go1.21 -// +build !purego,!appengine,go1.21 +//go:build go1.21 package protoreflect diff --git a/vendor/google.golang.org/protobuf/runtime/protoiface/methods.go b/vendor/google.golang.org/protobuf/runtime/protoiface/methods.go index 44cf467d884..246156561ce 100644 --- a/vendor/google.golang.org/protobuf/runtime/protoiface/methods.go +++ b/vendor/google.golang.org/protobuf/runtime/protoiface/methods.go @@ -39,6 +39,9 @@ type Methods = struct { // CheckInitialized returns an error if any required fields in the message are not set. CheckInitialized func(CheckInitializedInput) (CheckInitializedOutput, error) + + // Equal compares two messages and returns EqualOutput.Equal == true if they are equal. + Equal func(EqualInput) EqualOutput } // SupportFlags indicate support for optional features. @@ -166,3 +169,18 @@ type CheckInitializedInput = struct { type CheckInitializedOutput = struct { pragma.NoUnkeyedLiterals } + +// EqualInput is input to the Equal method. +type EqualInput = struct { + pragma.NoUnkeyedLiterals + + MessageA protoreflect.Message + MessageB protoreflect.Message +} + +// EqualOutput is output from the Equal method. +type EqualOutput = struct { + pragma.NoUnkeyedLiterals + + Equal bool +} diff --git a/vendor/google.golang.org/protobuf/types/descriptorpb/descriptor.pb.go b/vendor/google.golang.org/protobuf/types/descriptorpb/descriptor.pb.go index 9403eb07507..6dea75cd5b1 100644 --- a/vendor/google.golang.org/protobuf/types/descriptorpb/descriptor.pb.go +++ b/vendor/google.golang.org/protobuf/types/descriptorpb/descriptor.pb.go @@ -1217,11 +1217,9 @@ type FileDescriptorSet struct { func (x *FileDescriptorSet) Reset() { *x = FileDescriptorSet{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *FileDescriptorSet) String() string { @@ -1232,7 +1230,7 @@ func (*FileDescriptorSet) ProtoMessage() {} func (x *FileDescriptorSet) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[0] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -1291,11 +1289,9 @@ type FileDescriptorProto struct { func (x *FileDescriptorProto) Reset() { *x = FileDescriptorProto{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[1] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *FileDescriptorProto) String() string { @@ -1306,7 +1302,7 @@ func (*FileDescriptorProto) ProtoMessage() {} func (x *FileDescriptorProto) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[1] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -1434,11 +1430,9 @@ type DescriptorProto struct { func (x *DescriptorProto) Reset() { *x = DescriptorProto{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[2] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *DescriptorProto) String() string { @@ -1449,7 +1443,7 @@ func (*DescriptorProto) ProtoMessage() {} func (x *DescriptorProto) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[2] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -1561,11 +1555,9 @@ const ( func (x *ExtensionRangeOptions) Reset() { *x = ExtensionRangeOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[3] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *ExtensionRangeOptions) String() string { @@ -1576,7 +1568,7 @@ func (*ExtensionRangeOptions) ProtoMessage() {} func (x *ExtensionRangeOptions) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[3] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -1680,11 +1672,9 @@ type FieldDescriptorProto struct { func (x *FieldDescriptorProto) Reset() { *x = FieldDescriptorProto{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[4] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *FieldDescriptorProto) String() string { @@ -1695,7 +1685,7 @@ func (*FieldDescriptorProto) ProtoMessage() {} func (x *FieldDescriptorProto) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[4] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -1799,11 +1789,9 @@ type OneofDescriptorProto struct { func (x *OneofDescriptorProto) Reset() { *x = OneofDescriptorProto{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[5] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[5] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *OneofDescriptorProto) String() string { @@ -1814,7 +1802,7 @@ func (*OneofDescriptorProto) ProtoMessage() {} func (x *OneofDescriptorProto) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[5] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -1863,11 +1851,9 @@ type EnumDescriptorProto struct { func (x *EnumDescriptorProto) Reset() { *x = EnumDescriptorProto{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[6] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[6] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *EnumDescriptorProto) String() string { @@ -1878,7 +1864,7 @@ func (*EnumDescriptorProto) ProtoMessage() {} func (x *EnumDescriptorProto) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[6] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -1941,11 +1927,9 @@ type EnumValueDescriptorProto struct { func (x *EnumValueDescriptorProto) Reset() { *x = EnumValueDescriptorProto{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[7] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[7] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *EnumValueDescriptorProto) String() string { @@ -1956,7 +1940,7 @@ func (*EnumValueDescriptorProto) ProtoMessage() {} func (x *EnumValueDescriptorProto) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[7] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -2005,11 +1989,9 @@ type ServiceDescriptorProto struct { func (x *ServiceDescriptorProto) Reset() { *x = ServiceDescriptorProto{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[8] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[8] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *ServiceDescriptorProto) String() string { @@ -2020,7 +2002,7 @@ func (*ServiceDescriptorProto) ProtoMessage() {} func (x *ServiceDescriptorProto) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[8] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -2082,11 +2064,9 @@ const ( func (x *MethodDescriptorProto) Reset() { *x = MethodDescriptorProto{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[9] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[9] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *MethodDescriptorProto) String() string { @@ -2097,7 +2077,7 @@ func (*MethodDescriptorProto) ProtoMessage() {} func (x *MethodDescriptorProto) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[9] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -2267,11 +2247,9 @@ const ( func (x *FileOptions) Reset() { *x = FileOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[10] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[10] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *FileOptions) String() string { @@ -2282,7 +2260,7 @@ func (*FileOptions) ProtoMessage() {} func (x *FileOptions) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[10] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -2534,11 +2512,9 @@ const ( func (x *MessageOptions) Reset() { *x = MessageOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[11] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[11] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *MessageOptions) String() string { @@ -2549,7 +2525,7 @@ func (*MessageOptions) ProtoMessage() {} func (x *MessageOptions) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[11] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -2707,11 +2683,9 @@ const ( func (x *FieldOptions) Reset() { *x = FieldOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[12] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[12] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *FieldOptions) String() string { @@ -2722,7 +2696,7 @@ func (*FieldOptions) ProtoMessage() {} func (x *FieldOptions) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[12] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -2849,11 +2823,9 @@ type OneofOptions struct { func (x *OneofOptions) Reset() { *x = OneofOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[13] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[13] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *OneofOptions) String() string { @@ -2864,7 +2836,7 @@ func (*OneofOptions) ProtoMessage() {} func (x *OneofOptions) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[13] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -2929,11 +2901,9 @@ const ( func (x *EnumOptions) Reset() { *x = EnumOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[14] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[14] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *EnumOptions) String() string { @@ -2944,7 +2914,7 @@ func (*EnumOptions) ProtoMessage() {} func (x *EnumOptions) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[14] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3026,11 +2996,9 @@ const ( func (x *EnumValueOptions) Reset() { *x = EnumValueOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[15] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[15] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *EnumValueOptions) String() string { @@ -3041,7 +3009,7 @@ func (*EnumValueOptions) ProtoMessage() {} func (x *EnumValueOptions) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[15] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3115,11 +3083,9 @@ const ( func (x *ServiceOptions) Reset() { *x = ServiceOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[16] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[16] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *ServiceOptions) String() string { @@ -3130,7 +3096,7 @@ func (*ServiceOptions) ProtoMessage() {} func (x *ServiceOptions) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[16] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3192,11 +3158,9 @@ const ( func (x *MethodOptions) Reset() { *x = MethodOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[17] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[17] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *MethodOptions) String() string { @@ -3207,7 +3171,7 @@ func (*MethodOptions) ProtoMessage() {} func (x *MethodOptions) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[17] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3274,11 +3238,9 @@ type UninterpretedOption struct { func (x *UninterpretedOption) Reset() { *x = UninterpretedOption{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[18] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[18] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *UninterpretedOption) String() string { @@ -3289,7 +3251,7 @@ func (*UninterpretedOption) ProtoMessage() {} func (x *UninterpretedOption) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[18] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3375,11 +3337,9 @@ type FeatureSet struct { func (x *FeatureSet) Reset() { *x = FeatureSet{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[19] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[19] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *FeatureSet) String() string { @@ -3390,7 +3350,7 @@ func (*FeatureSet) ProtoMessage() {} func (x *FeatureSet) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[19] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3467,11 +3427,9 @@ type FeatureSetDefaults struct { func (x *FeatureSetDefaults) Reset() { *x = FeatureSetDefaults{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[20] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[20] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *FeatureSetDefaults) String() string { @@ -3482,7 +3440,7 @@ func (*FeatureSetDefaults) ProtoMessage() {} func (x *FeatureSetDefaults) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[20] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3578,11 +3536,9 @@ type SourceCodeInfo struct { func (x *SourceCodeInfo) Reset() { *x = SourceCodeInfo{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[21] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[21] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *SourceCodeInfo) String() string { @@ -3593,7 +3549,7 @@ func (*SourceCodeInfo) ProtoMessage() {} func (x *SourceCodeInfo) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[21] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3630,11 +3586,9 @@ type GeneratedCodeInfo struct { func (x *GeneratedCodeInfo) Reset() { *x = GeneratedCodeInfo{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[22] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[22] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *GeneratedCodeInfo) String() string { @@ -3645,7 +3599,7 @@ func (*GeneratedCodeInfo) ProtoMessage() {} func (x *GeneratedCodeInfo) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[22] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3679,11 +3633,9 @@ type DescriptorProto_ExtensionRange struct { func (x *DescriptorProto_ExtensionRange) Reset() { *x = DescriptorProto_ExtensionRange{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[23] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[23] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *DescriptorProto_ExtensionRange) String() string { @@ -3694,7 +3646,7 @@ func (*DescriptorProto_ExtensionRange) ProtoMessage() {} func (x *DescriptorProto_ExtensionRange) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[23] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3744,11 +3696,9 @@ type DescriptorProto_ReservedRange struct { func (x *DescriptorProto_ReservedRange) Reset() { *x = DescriptorProto_ReservedRange{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[24] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[24] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *DescriptorProto_ReservedRange) String() string { @@ -3759,7 +3709,7 @@ func (*DescriptorProto_ReservedRange) ProtoMessage() {} func (x *DescriptorProto_ReservedRange) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[24] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3813,11 +3763,9 @@ type ExtensionRangeOptions_Declaration struct { func (x *ExtensionRangeOptions_Declaration) Reset() { *x = ExtensionRangeOptions_Declaration{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[25] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[25] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *ExtensionRangeOptions_Declaration) String() string { @@ -3828,7 +3776,7 @@ func (*ExtensionRangeOptions_Declaration) ProtoMessage() {} func (x *ExtensionRangeOptions_Declaration) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[25] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3895,11 +3843,9 @@ type EnumDescriptorProto_EnumReservedRange struct { func (x *EnumDescriptorProto_EnumReservedRange) Reset() { *x = EnumDescriptorProto_EnumReservedRange{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[26] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[26] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *EnumDescriptorProto_EnumReservedRange) String() string { @@ -3910,7 +3856,7 @@ func (*EnumDescriptorProto_EnumReservedRange) ProtoMessage() {} func (x *EnumDescriptorProto_EnumReservedRange) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[26] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3950,11 +3896,9 @@ type FieldOptions_EditionDefault struct { func (x *FieldOptions_EditionDefault) Reset() { *x = FieldOptions_EditionDefault{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[27] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[27] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *FieldOptions_EditionDefault) String() string { @@ -3965,7 +3909,7 @@ func (*FieldOptions_EditionDefault) ProtoMessage() {} func (x *FieldOptions_EditionDefault) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[27] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -4018,11 +3962,9 @@ type FieldOptions_FeatureSupport struct { func (x *FieldOptions_FeatureSupport) Reset() { *x = FieldOptions_FeatureSupport{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[28] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[28] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *FieldOptions_FeatureSupport) String() string { @@ -4033,7 +3975,7 @@ func (*FieldOptions_FeatureSupport) ProtoMessage() {} func (x *FieldOptions_FeatureSupport) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[28] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -4092,11 +4034,9 @@ type UninterpretedOption_NamePart struct { func (x *UninterpretedOption_NamePart) Reset() { *x = UninterpretedOption_NamePart{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[29] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[29] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *UninterpretedOption_NamePart) String() string { @@ -4107,7 +4047,7 @@ func (*UninterpretedOption_NamePart) ProtoMessage() {} func (x *UninterpretedOption_NamePart) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[29] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -4154,11 +4094,9 @@ type FeatureSetDefaults_FeatureSetEditionDefault struct { func (x *FeatureSetDefaults_FeatureSetEditionDefault) Reset() { *x = FeatureSetDefaults_FeatureSetEditionDefault{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[30] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[30] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *FeatureSetDefaults_FeatureSetEditionDefault) String() string { @@ -4169,7 +4107,7 @@ func (*FeatureSetDefaults_FeatureSetEditionDefault) ProtoMessage() {} func (x *FeatureSetDefaults_FeatureSetEditionDefault) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[30] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -4305,11 +4243,9 @@ type SourceCodeInfo_Location struct { func (x *SourceCodeInfo_Location) Reset() { *x = SourceCodeInfo_Location{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[31] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[31] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *SourceCodeInfo_Location) String() string { @@ -4320,7 +4256,7 @@ func (*SourceCodeInfo_Location) ProtoMessage() {} func (x *SourceCodeInfo_Location) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[31] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -4392,11 +4328,9 @@ type GeneratedCodeInfo_Annotation struct { func (x *GeneratedCodeInfo_Annotation) Reset() { *x = GeneratedCodeInfo_Annotation{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[32] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[32] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *GeneratedCodeInfo_Annotation) String() string { @@ -4407,7 +4341,7 @@ func (*GeneratedCodeInfo_Annotation) ProtoMessage() {} func (x *GeneratedCodeInfo_Annotation) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[32] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -5385,424 +5319,6 @@ func file_google_protobuf_descriptor_proto_init() { if File_google_protobuf_descriptor_proto != nil { return } - if !protoimpl.UnsafeEnabled { - file_google_protobuf_descriptor_proto_msgTypes[0].Exporter = func(v any, i int) any { - switch v := v.(*FileDescriptorSet); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[1].Exporter = func(v any, i int) any { - switch v := v.(*FileDescriptorProto); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[2].Exporter = func(v any, i int) any { - switch v := v.(*DescriptorProto); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[3].Exporter = func(v any, i int) any { - switch v := v.(*ExtensionRangeOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[4].Exporter = func(v any, i int) any { - switch v := v.(*FieldDescriptorProto); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[5].Exporter = func(v any, i int) any { - switch v := v.(*OneofDescriptorProto); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[6].Exporter = func(v any, i int) any { - switch v := v.(*EnumDescriptorProto); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[7].Exporter = func(v any, i int) any { - switch v := v.(*EnumValueDescriptorProto); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[8].Exporter = func(v any, i int) any { - switch v := v.(*ServiceDescriptorProto); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[9].Exporter = func(v any, i int) any { - switch v := v.(*MethodDescriptorProto); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[10].Exporter = func(v any, i int) any { - switch v := v.(*FileOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[11].Exporter = func(v any, i int) any { - switch v := v.(*MessageOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[12].Exporter = func(v any, i int) any { - switch v := v.(*FieldOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[13].Exporter = func(v any, i int) any { - switch v := v.(*OneofOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[14].Exporter = func(v any, i int) any { - switch v := v.(*EnumOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[15].Exporter = func(v any, i int) any { - switch v := v.(*EnumValueOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[16].Exporter = func(v any, i int) any { - switch v := v.(*ServiceOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[17].Exporter = func(v any, i int) any { - switch v := v.(*MethodOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[18].Exporter = func(v any, i int) any { - switch v := v.(*UninterpretedOption); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[19].Exporter = func(v any, i int) any { - switch v := v.(*FeatureSet); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[20].Exporter = func(v any, i int) any { - switch v := v.(*FeatureSetDefaults); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[21].Exporter = func(v any, i int) any { - switch v := v.(*SourceCodeInfo); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[22].Exporter = func(v any, i int) any { - switch v := v.(*GeneratedCodeInfo); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[23].Exporter = func(v any, i int) any { - switch v := v.(*DescriptorProto_ExtensionRange); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[24].Exporter = func(v any, i int) any { - switch v := v.(*DescriptorProto_ReservedRange); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[25].Exporter = func(v any, i int) any { - switch v := v.(*ExtensionRangeOptions_Declaration); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[26].Exporter = func(v any, i int) any { - switch v := v.(*EnumDescriptorProto_EnumReservedRange); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[27].Exporter = func(v any, i int) any { - switch v := v.(*FieldOptions_EditionDefault); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[28].Exporter = func(v any, i int) any { - switch v := v.(*FieldOptions_FeatureSupport); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[29].Exporter = func(v any, i int) any { - switch v := v.(*UninterpretedOption_NamePart); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[30].Exporter = func(v any, i int) any { - switch v := v.(*FeatureSetDefaults_FeatureSetEditionDefault); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[31].Exporter = func(v any, i int) any { - switch v := v.(*SourceCodeInfo_Location); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[32].Exporter = func(v any, i int) any { - switch v := v.(*GeneratedCodeInfo_Annotation); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - } type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ diff --git a/vendor/google.golang.org/protobuf/types/gofeaturespb/go_features.pb.go b/vendor/google.golang.org/protobuf/types/gofeaturespb/go_features.pb.go index a2ca940c50f..c7e860fcd6d 100644 --- a/vendor/google.golang.org/protobuf/types/gofeaturespb/go_features.pb.go +++ b/vendor/google.golang.org/protobuf/types/gofeaturespb/go_features.pb.go @@ -29,11 +29,9 @@ type GoFeatures struct { func (x *GoFeatures) Reset() { *x = GoFeatures{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_go_features_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_go_features_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *GoFeatures) String() string { @@ -44,7 +42,7 @@ func (*GoFeatures) ProtoMessage() {} func (x *GoFeatures) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_go_features_proto_msgTypes[0] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -145,20 +143,6 @@ func file_google_protobuf_go_features_proto_init() { if File_google_protobuf_go_features_proto != nil { return } - if !protoimpl.UnsafeEnabled { - file_google_protobuf_go_features_proto_msgTypes[0].Exporter = func(v any, i int) any { - switch v := v.(*GoFeatures); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - } type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ diff --git a/vendor/google.golang.org/protobuf/types/known/anypb/any.pb.go b/vendor/google.golang.org/protobuf/types/known/anypb/any.pb.go index 7172b43d383..87da199a386 100644 --- a/vendor/google.golang.org/protobuf/types/known/anypb/any.pb.go +++ b/vendor/google.golang.org/protobuf/types/known/anypb/any.pb.go @@ -368,11 +368,9 @@ func (x *Any) UnmarshalNew() (proto.Message, error) { func (x *Any) Reset() { *x = Any{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_any_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_any_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *Any) String() string { @@ -383,7 +381,7 @@ func (*Any) ProtoMessage() {} func (x *Any) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_any_proto_msgTypes[0] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -461,20 +459,6 @@ func file_google_protobuf_any_proto_init() { if File_google_protobuf_any_proto != nil { return } - if !protoimpl.UnsafeEnabled { - file_google_protobuf_any_proto_msgTypes[0].Exporter = func(v any, i int) any { - switch v := v.(*Any); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - } type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ diff --git a/vendor/google.golang.org/protobuf/types/known/durationpb/duration.pb.go b/vendor/google.golang.org/protobuf/types/known/durationpb/duration.pb.go index 1b71bcd910a..b99d4d24109 100644 --- a/vendor/google.golang.org/protobuf/types/known/durationpb/duration.pb.go +++ b/vendor/google.golang.org/protobuf/types/known/durationpb/duration.pb.go @@ -245,11 +245,9 @@ func (x *Duration) check() uint { func (x *Duration) Reset() { *x = Duration{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_duration_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_duration_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *Duration) String() string { @@ -260,7 +258,7 @@ func (*Duration) ProtoMessage() {} func (x *Duration) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_duration_proto_msgTypes[0] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -339,20 +337,6 @@ func file_google_protobuf_duration_proto_init() { if File_google_protobuf_duration_proto != nil { return } - if !protoimpl.UnsafeEnabled { - file_google_protobuf_duration_proto_msgTypes[0].Exporter = func(v any, i int) any { - switch v := v.(*Duration); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - } type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ diff --git a/vendor/google.golang.org/protobuf/types/known/emptypb/empty.pb.go b/vendor/google.golang.org/protobuf/types/known/emptypb/empty.pb.go index d87b4fb8281..1761bc9c69a 100644 --- a/vendor/google.golang.org/protobuf/types/known/emptypb/empty.pb.go +++ b/vendor/google.golang.org/protobuf/types/known/emptypb/empty.pb.go @@ -55,11 +55,9 @@ type Empty struct { func (x *Empty) Reset() { *x = Empty{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_empty_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_empty_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *Empty) String() string { @@ -70,7 +68,7 @@ func (*Empty) ProtoMessage() {} func (x *Empty) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_empty_proto_msgTypes[0] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -131,20 +129,6 @@ func file_google_protobuf_empty_proto_init() { if File_google_protobuf_empty_proto != nil { return } - if !protoimpl.UnsafeEnabled { - file_google_protobuf_empty_proto_msgTypes[0].Exporter = func(v any, i int) any { - switch v := v.(*Empty); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - } type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ diff --git a/vendor/google.golang.org/protobuf/types/known/fieldmaskpb/field_mask.pb.go b/vendor/google.golang.org/protobuf/types/known/fieldmaskpb/field_mask.pb.go index ac1e91bb6dd..19de8d371fd 100644 --- a/vendor/google.golang.org/protobuf/types/known/fieldmaskpb/field_mask.pb.go +++ b/vendor/google.golang.org/protobuf/types/known/fieldmaskpb/field_mask.pb.go @@ -467,11 +467,9 @@ func rangeFields(path string, f func(field string) bool) bool { func (x *FieldMask) Reset() { *x = FieldMask{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_field_mask_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_field_mask_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *FieldMask) String() string { @@ -482,7 +480,7 @@ func (*FieldMask) ProtoMessage() {} func (x *FieldMask) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_field_mask_proto_msgTypes[0] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -553,20 +551,6 @@ func file_google_protobuf_field_mask_proto_init() { if File_google_protobuf_field_mask_proto != nil { return } - if !protoimpl.UnsafeEnabled { - file_google_protobuf_field_mask_proto_msgTypes[0].Exporter = func(v any, i int) any { - switch v := v.(*FieldMask); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - } type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ diff --git a/vendor/google.golang.org/protobuf/types/known/timestamppb/timestamp.pb.go b/vendor/google.golang.org/protobuf/types/known/timestamppb/timestamp.pb.go index 83a5a645b08..0d20722d70b 100644 --- a/vendor/google.golang.org/protobuf/types/known/timestamppb/timestamp.pb.go +++ b/vendor/google.golang.org/protobuf/types/known/timestamppb/timestamp.pb.go @@ -254,11 +254,9 @@ func (x *Timestamp) check() uint { func (x *Timestamp) Reset() { *x = Timestamp{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_timestamp_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_timestamp_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *Timestamp) String() string { @@ -269,7 +267,7 @@ func (*Timestamp) ProtoMessage() {} func (x *Timestamp) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_timestamp_proto_msgTypes[0] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -348,20 +346,6 @@ func file_google_protobuf_timestamp_proto_init() { if File_google_protobuf_timestamp_proto != nil { return } - if !protoimpl.UnsafeEnabled { - file_google_protobuf_timestamp_proto_msgTypes[0].Exporter = func(v any, i int) any { - switch v := v.(*Timestamp); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - } type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ diff --git a/vendor/modules.txt b/vendor/modules.txt index 41585487710..43b19c38d61 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -326,6 +326,12 @@ github.com/docker/go-units # github.com/dustin/go-humanize v1.0.1 ## explicit; go 1.16 github.com/dustin/go-humanize +# github.com/ebitengine/purego v0.8.0 +## explicit; go 1.18 +github.com/ebitengine/purego +github.com/ebitengine/purego/internal/cgo +github.com/ebitengine/purego/internal/fakecgo +github.com/ebitengine/purego/internal/strings # github.com/edsrzf/mmap-go v1.1.0 ## explicit; go 1.17 github.com/edsrzf/mmap-go @@ -778,7 +784,7 @@ github.com/json-iterator/go # github.com/julienschmidt/httprouter v1.3.0 ## explicit; go 1.7 github.com/julienschmidt/httprouter -# github.com/klauspost/compress v1.17.10 +# github.com/klauspost/compress v1.17.11 ## explicit; go 1.21 github.com/klauspost/compress github.com/klauspost/compress/flate @@ -964,7 +970,7 @@ github.com/prometheus/alertmanager/timeinterval github.com/prometheus/alertmanager/tracing github.com/prometheus/alertmanager/types github.com/prometheus/alertmanager/ui -# github.com/prometheus/client_golang v1.20.4 +# github.com/prometheus/client_golang v1.20.5 ## explicit; go 1.20 github.com/prometheus/client_golang/api github.com/prometheus/client_golang/api/prometheus/v1 @@ -1001,7 +1007,7 @@ github.com/prometheus/exporter-toolkit/web github.com/prometheus/procfs github.com/prometheus/procfs/internal/fs github.com/prometheus/procfs/internal/util -# github.com/prometheus/prometheus v1.99.0 => github.com/grafana/mimir-prometheus v0.0.0-20241015102654-eaa7eae2d877 +# github.com/prometheus/prometheus v1.99.0 => github.com/grafana/mimir-prometheus v0.0.0-20241016140351-5e9a771537ba ## explicit; go 1.22.0 github.com/prometheus/prometheus/config github.com/prometheus/prometheus/discovery @@ -1082,7 +1088,7 @@ github.com/segmentio/fasthash/fnv1a # github.com/sercand/kuberesolver/v5 v5.1.1 ## explicit; go 1.18 github.com/sercand/kuberesolver/v5 -# github.com/shirou/gopsutil/v4 v4.24.8 +# github.com/shirou/gopsutil/v4 v4.24.9 ## explicit; go 1.18 github.com/shirou/gopsutil/v4/common github.com/shirou/gopsutil/v4/cpu @@ -1090,9 +1096,6 @@ github.com/shirou/gopsutil/v4/internal/common github.com/shirou/gopsutil/v4/mem github.com/shirou/gopsutil/v4/net github.com/shirou/gopsutil/v4/process -# github.com/shoenig/go-m1cpu v0.1.6 -## explicit; go 1.20 -github.com/shoenig/go-m1cpu # github.com/shopspring/decimal v1.2.0 ## explicit; go 1.13 github.com/shopspring/decimal @@ -1469,7 +1472,7 @@ google.golang.org/genproto/googleapis/type/expr ## explicit; go 1.21 google.golang.org/genproto/googleapis/api google.golang.org/genproto/googleapis/api/annotations -# google.golang.org/genproto/googleapis/rpc v0.0.0-20240930140551-af27646dc61f +# google.golang.org/genproto/googleapis/rpc v0.0.0-20241015192408-796eee8c2d53 ## explicit; go 1.21 google.golang.org/genproto/googleapis/rpc/code google.golang.org/genproto/googleapis/rpc/errdetails @@ -1544,8 +1547,8 @@ google.golang.org/grpc/stats google.golang.org/grpc/status google.golang.org/grpc/tap google.golang.org/grpc/test/bufconn -# google.golang.org/protobuf v1.34.2 -## explicit; go 1.20 +# google.golang.org/protobuf v1.35.1 +## explicit; go 1.21 google.golang.org/protobuf/encoding/protodelim google.golang.org/protobuf/encoding/protojson google.golang.org/protobuf/encoding/prototext @@ -1668,7 +1671,7 @@ sigs.k8s.io/kustomize/kyaml/yaml/walk sigs.k8s.io/yaml sigs.k8s.io/yaml/goyaml.v2 sigs.k8s.io/yaml/goyaml.v3 -# github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20241015102654-eaa7eae2d877 +# github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20241016140351-5e9a771537ba # github.com/hashicorp/memberlist => github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe # gopkg.in/yaml.v3 => github.com/colega/go-yaml-yaml v0.0.0-20220720105220-255a8d16d094 # github.com/grafana/regexp => github.com/grafana/regexp v0.0.0-20240531075221-3685f1377d7b