Skip cleaning for search by name scrape queries (#2059)

* Skip pp for search by name queries
* upgrade htmlquery
This commit is contained in:
bnkai 2021-12-16 02:18:39 +02:00 committed by GitHub
parent 439c338049
commit 66dd239732
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
34 changed files with 10925 additions and 10665 deletions

10
go.mod
View file

@ -4,7 +4,7 @@ require (
github.com/99designs/gqlgen v0.12.2
github.com/Yamashou/gqlgenc v0.0.0-20200902035953-4dbef3551953
github.com/anacrolix/dms v1.2.2
github.com/antchfx/htmlquery v1.2.3
github.com/antchfx/htmlquery v1.2.5-0.20211125074323-810ee8082758
github.com/chromedp/cdproto v0.0.0-20210622022015-fe1827b46b84
github.com/chromedp/chromedp v0.7.3
github.com/corona10/goimagehash v1.0.3
@ -37,10 +37,10 @@ require (
github.com/vektra/mockery/v2 v2.2.1
golang.org/x/crypto v0.0.0-20210817164053-32db794688a5
golang.org/x/image v0.0.0-20210220032944-ac19c3e999fb
golang.org/x/net v0.0.0-20210520170846-37e1c6afe023
golang.org/x/net v0.0.0-20211123203042-d83791d6bcd9
golang.org/x/sys v0.0.0-20210823070655-63515b42dcdf
golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b // indirect
golang.org/x/text v0.3.6
golang.org/x/text v0.3.7
golang.org/x/tools v0.1.5 // indirect
gopkg.in/sourcemap.v1 v1.0.5 // indirect
gopkg.in/yaml.v2 v2.4.0
@ -54,7 +54,7 @@ require (
require (
github.com/agnivade/levenshtein v1.1.0 // indirect
github.com/antchfx/xpath v1.1.6 // indirect
github.com/antchfx/xpath v1.2.0 // indirect
github.com/chromedp/sysutil v1.0.0 // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
@ -62,7 +62,7 @@ require (
github.com/gobwas/httphead v0.1.0 // indirect
github.com/gobwas/pool v0.2.1 // indirect
github.com/gobwas/ws v1.1.0-rc.5 // indirect
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/hashicorp/errwrap v1.0.0 // indirect
github.com/hashicorp/go-multierror v1.1.0 // indirect
github.com/hashicorp/golang-lru v0.5.1 // indirect

17
go.sum
View file

@ -81,10 +81,10 @@ github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883 h1:bvNMNQO63//z+xNg
github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8=
github.com/andybalholm/brotli v1.0.3 h1:fpcw+r1N1h0Poc1F/pHbW40cUm/lMEQslZtCkBQ0UnM=
github.com/andybalholm/brotli v1.0.3/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
github.com/antchfx/htmlquery v1.2.3 h1:sP3NFDneHx2stfNXCKbhHFo8XgNjCACnU/4AO5gWz6M=
github.com/antchfx/htmlquery v1.2.3/go.mod h1:B0ABL+F5irhhMWg54ymEZinzMSi0Kt3I2if0BLYa3V0=
github.com/antchfx/xpath v1.1.6 h1:6sVh6hB5T6phw1pFpHRQ+C4bd8sNI+O58flqtg7h0R0=
github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
github.com/antchfx/htmlquery v1.2.5-0.20211125074323-810ee8082758 h1:Ldjwcl7T8VqCKgQQ0TfPI8fNb8O/GtMXcYaHlqOu99s=
github.com/antchfx/htmlquery v1.2.5-0.20211125074323-810ee8082758/go.mod h1:2xO6iu3EVWs7R2JYqBbp8YzG50gj/ofqs5/0VZoDZLc=
github.com/antchfx/xpath v1.2.0 h1:mbwv7co+x0RwgeGAOHdrKy89GvHaGvxxBtPK0uF9Zr8=
github.com/antchfx/xpath v1.2.0/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
github.com/apache/arrow/go/arrow v0.0.0-20200601151325-b2287a20f230/go.mod h1:QNYViu/X0HXDHw7m3KXzWSVXIbfUvJqBFe6Gj8/pYA0=
github.com/apache/arrow/go/arrow v0.0.0-20210521153258-78c88a9f517b/go.mod h1:R4hW3Ug0s+n4CUsWHKOj00Pu01ZqU4x/hSF5kXUcXKQ=
@ -272,8 +272,9 @@ github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfU
github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY=
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE=
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y=
@ -871,8 +872,9 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v
golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLdyRGr576XBO4/greRjx4P4O3yc=
golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
golang.org/x/net v0.0.0-20210503060351-7fd8e65b6420/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20210520170846-37e1c6afe023 h1:ADo5wSpq2gqaCGQWzk7S5vd//0iyyLeAratkEoG5dLE=
golang.org/x/net v0.0.0-20210520170846-37e1c6afe023/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20211123203042-d83791d6bcd9 h1:0qxwC5n+ttVOINCBeRHO0nq9X7uy8SDsPoi5OaCdIEI=
golang.org/x/net v0.0.0-20211123203042-d83791d6bcd9/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/oauth2 v0.0.0-20180227000427-d7d64896b5ff/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20181106182150-f42d05182288/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
@ -994,8 +996,9 @@ golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=

View file

@ -119,6 +119,7 @@ func (s *jsonScraper) scrapeByName(ctx context.Context, name string, ty models.S
}
q := s.getJsonQuery(doc)
q.setType(SearchQuery)
var content []models.ScrapedContent
switch ty {
@ -242,6 +243,15 @@ func (s *jsonScraper) getJsonQuery(doc string) *jsonQuery {
type jsonQuery struct {
doc string
scraper *jsonScraper
queryType QueryType
}
func (q *jsonQuery) getType() QueryType {
return q.queryType
}
func (q *jsonQuery) setType(t QueryType) {
q.queryType = t
}
func (q *jsonQuery) runQuery(selector string) ([]string, error) {

View file

@ -19,6 +19,8 @@ import (
type mappedQuery interface {
runQuery(selector string) ([]string, error)
getType() QueryType
setType(QueryType)
subScrape(ctx context.Context, value string) mappedQuery
}
@ -77,6 +79,10 @@ func (s mappedConfig) postProcess(ctx context.Context, q mappedQuery, attrConfig
result = attrConfig.postProcess(ctx, result, q)
if attrConfig.hasSplit() {
results := attrConfig.splitString(result)
// skip cleaning when the query is used for searching
if q.getType() == SearchQuery {
return results
}
results = attrConfig.cleanResults(results)
return results
}
@ -91,7 +97,12 @@ func (s mappedConfig) postProcess(ctx context.Context, q mappedQuery, attrConfig
ret = append(ret, text)
}
// skip cleaning when the query is used for searching
if q.getType() == SearchQuery {
return ret
}
ret = attrConfig.cleanResults(ret)
}
return ret

View file

@ -29,6 +29,15 @@ type Input struct {
Gallery *models.ScrapedGalleryInput
}
// simple type definitions that can help customize
// actions per query
type QueryType int
const (
// for now only SearchQuery is needed
SearchQuery QueryType = iota + 1
)
// scraper is the generic interface to the scraper subsystems
type scraper interface {
// spec returns the scraper specification, suitable for graphql

View file

@ -100,6 +100,7 @@ func (s *xpathScraper) scrapeByName(ctx context.Context, name string, ty models.
}
q := s.getXPathQuery(doc)
q.setType(SearchQuery)
var content []models.ScrapedContent
switch ty {
@ -240,6 +241,15 @@ func (s *xpathScraper) getXPathQuery(doc *html.Node) *xpathQuery {
type xpathQuery struct {
doc *html.Node
scraper *xpathScraper
queryType QueryType
}
func (q *xpathQuery) getType() QueryType {
return q.queryType
}
func (q *xpathQuery) setType(t QueryType) {
q.queryType = t
}
func (q *xpathQuery) runQuery(selector string) ([]string, error) {

View file

@ -12,6 +12,16 @@ Overview
`htmlquery` built-in the query object caching feature based on [LRU](https://godoc.org/github.com/golang/groupcache/lru), this feature will caching the recently used XPATH query string. Enable query caching can avoid re-compile XPath expression each query.
You can visit this page to learn about the supported XPath(1.0/2.0) syntax. https://github.com/antchfx/xpath
XPath query packages for Go
===
| Name | Description |
| ------------------------------------------------- | ----------------------------------------- |
| [htmlquery](https://github.com/antchfx/htmlquery) | XPath query package for the HTML document |
| [xmlquery](https://github.com/antchfx/xmlquery) | XPath query package for the XML document |
| [jsonquery](https://github.com/antchfx/jsonquery) | XPath query package for the JSON document |
Installation
====
@ -60,15 +70,15 @@ list := htmlquery.Find(doc, "//a")
#### Find all A elements that have `href` attribute.
```go
list := range htmlquery.Find(doc, "//a[@href]")
list := htmlquery.Find(doc, "//a[@href]")
```
#### Find all A elements with `href` attribute and only return `href` value.
```go
list := range htmlquery.Find(doc, "//a/@href")
for n := range list{
fmt.Println(htmlquery.InnerText(n)) // output @href value without A element.
list := htmlquery.Find(doc, "//a/@href")
for _ , n := range list{
fmt.Println(htmlquery.SelectAttr(n, "href")) // output @href value
}
```
@ -78,6 +88,13 @@ for n := range list{
a := htmlquery.FindOne(doc, "//a[3]")
```
### Find children element (img) under A `href` and print the source
```go
a := htmlquery.FindOne(doc, "//a")
img := htmlquery.FindOne(a, "//img")
fmt.Prinln(htmlquery.SelectAttr(img, "src")) // output @src value
```
#### Evaluate the number of all IMG element.
```go
@ -87,6 +104,30 @@ fmt.Printf("total count is %f", v)
```
Quick Starts
===
```go
func main() {
doc, err := htmlquery.LoadURL("https://www.bing.com/search?q=golang")
if err != nil {
panic(err)
}
// Find all news item.
list, err := htmlquery.QueryAll(doc, "//ol/li")
if err != nil {
panic(err)
}
for i, n := range list {
a := htmlquery.FindOne(n, "//a")
if a != nil {
fmt.Printf("%d %s(%s)\n", i, htmlquery.InnerText(a), htmlquery.SelectAttr(a, "href"))
}
}
}
```
FAQ
====
@ -117,52 +158,6 @@ BenchmarkDisableSelectorCache-4 500000 3162 ns/op
htmlquery.DisableSelectorCache = true
```
Changelogs
===
2019-11-19
- Add built-in query object cache feature, avoid re-compilation for the same query string. [#16](https://github.com/antchfx/htmlquery/issues/16)
- Added LoadDoc [18](https://github.com/antchfx/htmlquery/pull/18)
2019-10-05
- Add new methods that compatible with invalid XPath expression error: `QueryAll` and `Query`.
- Add `QuerySelector` and `QuerySelectorAll` methods, supported reused your query object.
2019-02-04
- [#7](https://github.com/antchfx/htmlquery/issues/7) Removed deprecated `FindEach()` and `FindEachWithBreak()` methods.
2018-12-28
- Avoid adding duplicate elements to list for `Find()` method. [#6](https://github.com/antchfx/htmlquery/issues/6)
Tutorial
===
```go
func main() {
doc, err := htmlquery.LoadURL("https://www.bing.com/search?q=golang")
if err != nil {
panic(err)
}
// Find all news item.
list, err := htmlquery.QueryAll(doc, "//ol/li")
if err != nil {
panic(err)
}
for i, n := range list {
a := htmlquery.FindOne(n, "//a")
fmt.Printf("%d %s(%s)\n", i, htmlquery.InnerText(a), htmlquery.SelectAttr(a, "href"))
}
}
```
List of supported XPath query packages
===
| Name | Description |
| ------------------------------------------------- | ----------------------------------------- |
| [htmlquery](https://github.com/antchfx/htmlquery) | XPath query package for the HTML document |
| [xmlquery](https://github.com/antchfx/xmlquery) | XPath query package for the XML document |
| [jsonquery](https://github.com/antchfx/jsonquery) | XPath query package for the JSON document |
Questions
===
Please let me know if you have any questions.

View file

@ -55,10 +55,10 @@ func QueryAll(top *html.Node, expr string) ([]*html.Node, error) {
return nodes, nil
}
// Query searches the html.Node that matches by the specified XPath expr,
// and return the first element of matched html.Node.
// Query runs the given XPath expression against the given html.Node and
// returns the first matching html.Node, or nil if no matches are found.
//
// Return an error if the expression `expr` cannot be parsed.
// Returns an error if the expression `expr` cannot be parsed.
func Query(top *html.Node, expr string) (*html.Node, error) {
exp, err := getQuery(expr)
if err != nil {
@ -83,11 +83,6 @@ func QuerySelectorAll(top *html.Node, selector *xpath.Expr) []*html.Node {
for t.MoveNext() {
nav := t.Current().(*NodeNavigator)
n := getCurrentNode(nav)
// avoid adding duplicate nodes.
if len(elems) > 0 && (elems[0] == n || (nav.NodeType() == xpath.AttributeNode &&
nav.LocalName() == elems[0].Data && nav.Value() == InnerText(elems[0]))) {
continue
}
elems = append(elems, n)
}
return elems
@ -179,6 +174,19 @@ func SelectAttr(n *html.Node, name string) (val string) {
return
}
// ExistsAttr returns whether attribute with specified name exists.
func ExistsAttr(n *html.Node, name string) bool {
if n == nil {
return false
}
for _, attr := range n.Attr {
if attr.Key == name {
return true
}
}
return false
}
// OutputHTML returns the text including tags name.
func OutputHTML(n *html.Node, self bool) string {
var buf bytes.Buffer

View file

@ -138,6 +138,7 @@ Supported Features
`lang()`| ✗ |
`last()`| ✓ |
`local-name()`| ✓ |
`matches()`| ✓ |
`name()`| ✓ |
`namespace-uri()`| ✓ |
`normalize-space()`| ✓ |

View file

@ -193,8 +193,23 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) {
if err != nil {
return nil, err
}
qyOutput = &functionQuery{Input: b.firstInput, Func: containsFunc(arg1, arg2)}
case "matches":
//matches(string , pattern)
if len(root.Args) != 2 {
return nil, errors.New("xpath: matches function must have two parameters")
}
var (
arg1, arg2 query
err error
)
if arg1, err = b.processNode(root.Args[0]); err != nil {
return nil, err
}
if arg2, err = b.processNode(root.Args[1]); err != nil {
return nil, err
}
qyOutput = &functionQuery{Input: b.firstInput, Func: matchesFunc(arg1, arg2)}
case "substring":
//substring( string , start [, length] )
if len(root.Args) < 2 {
@ -435,13 +450,15 @@ func (b *builder) processOperatorNode(root *operatorNode) (query, error) {
}
var qyOutput query
switch root.Op {
case "+", "-", "div", "mod": // Numeric operator
case "+", "-", "*", "div", "mod": // Numeric operator
var exprFunc func(interface{}, interface{}) interface{}
switch root.Op {
case "+":
exprFunc = plusFunc
case "-":
exprFunc = minusFunc
case "*":
exprFunc = mulFunc
case "div":
exprFunc = divFunc
case "mod":
@ -498,6 +515,12 @@ func (b *builder) processNode(root node) (q query, err error) {
q, err = b.processFunctionNode(root.(*functionNode))
case nodeOperator:
q, err = b.processOperatorNode(root.(*operatorNode))
case nodeGroup:
q, err = b.processNode(root.(*groupNode).Input)
if err != nil {
return
}
q = &groupQuery{Input: q}
}
return
}

80
vendor/github.com/antchfx/xpath/cache.go generated vendored Normal file
View file

@ -0,0 +1,80 @@
package xpath
import (
"regexp"
"sync"
)
type loadFunc func(key interface{}) (interface{}, error)
const (
defaultCap = 65536
)
// The reason we're building a simple capacity-resetting loading cache (when capacity reached) instead of using
// something like github.com/hashicorp/golang-lru is primarily due to (not wanting to create) external dependency.
// Currently this library has 0 external dep (other than go sdk), and supports go 1.6, 1.9, and 1.10 (and later).
// Creating external lib dependencies (plus their transitive dependencies) would make things hard if not impossible.
// We expect under most circumstances, the defaultCap is big enough for any long running services that use this
// library if their xpath regexp cardinality is low. However, in extreme cases when the capacity is reached, we
// simply reset the cache, taking a small subsequent perf hit (next to nothing considering amortization) in trade
// of more complex and less performant LRU type of construct.
type loadingCache struct {
sync.RWMutex
cap int
load loadFunc
m map[interface{}]interface{}
reset int
}
// NewLoadingCache creates a new instance of a loading cache with capacity. Capacity must be >= 0, or
// it will panic. Capacity == 0 means the cache growth is unbounded.
func NewLoadingCache(load loadFunc, capacity int) *loadingCache {
if capacity < 0 {
panic("capacity must be >= 0")
}
return &loadingCache{cap: capacity, load: load, m: make(map[interface{}]interface{})}
}
func (c *loadingCache) get(key interface{}) (interface{}, error) {
c.RLock()
v, found := c.m[key]
c.RUnlock()
if found {
return v, nil
}
v, err := c.load(key)
if err != nil {
return nil, err
}
c.Lock()
if c.cap > 0 && len(c.m) >= c.cap {
c.m = map[interface{}]interface{}{key: v}
c.reset++
} else {
c.m[key] = v
}
c.Unlock()
return v, nil
}
var (
// RegexpCache is a loading cache for string -> *regexp.Regexp mapping. It is exported so that in rare cases
// client can customize load func and/or capacity.
RegexpCache = defaultRegexpCache()
)
func defaultRegexpCache() *loadingCache {
return NewLoadingCache(
func(key interface{}) (interface{}, error) {
return regexp.Compile(key.(string))
}, defaultCap)
}
func getRegexp(pattern string) (*regexp.Regexp, error) {
exp, err := RegexpCache.get(pattern)
if err != nil {
return nil, err
}
return exp.(*regexp.Regexp), nil
}

View file

@ -4,11 +4,26 @@ import (
"errors"
"fmt"
"math"
"regexp"
"strconv"
"strings"
"sync"
"unicode"
)
// Defined an interface of stringBuilder that compatible with
// strings.Builder(go 1.10) and bytes.Buffer(< go 1.10)
type stringBuilder interface {
WriteRune(r rune) (n int, err error)
WriteString(s string) (int, error)
Reset()
Grow(n int)
String() string
}
var builderPool = sync.Pool{New: func() interface{} {
return newStringBuilder()
}}
// The XPath function list.
func predicate(q query) func(NodeNavigator) bool {
@ -25,7 +40,7 @@ func predicate(q query) func(NodeNavigator) bool {
func positionFunc(q query, t iterator) interface{} {
var (
count = 1
node = t.Current()
node = t.Current().Copy()
)
test := predicate(q)
for node.MoveToPrevious() {
@ -40,7 +55,7 @@ func positionFunc(q query, t iterator) interface{} {
func lastFunc(q query, t iterator) interface{} {
var (
count = 0
node = t.Current()
node = t.Current().Copy()
)
node.MoveToFirst()
test := predicate(q)
@ -58,6 +73,7 @@ func lastFunc(q query, t iterator) interface{} {
// countFunc is a XPath Node Set functions count(node-set).
func countFunc(q query, t iterator) interface{} {
var count = 0
q = functionArgs(q)
test := predicate(q)
switch typ := q.Evaluate(t).(type) {
case query:
@ -73,7 +89,7 @@ func countFunc(q query, t iterator) interface{} {
// sumFunc is a XPath Node Set functions sum(node-set).
func sumFunc(q query, t iterator) interface{} {
var sum float64
switch typ := q.Evaluate(t).(type) {
switch typ := functionArgs(q).Evaluate(t).(type) {
case query:
for node := typ.Select(t); node != nil; node = typ.Select(t) {
if v, err := strconv.ParseFloat(node.Value(), 64); err == nil {
@ -116,19 +132,19 @@ func asNumber(t iterator, o interface{}) float64 {
// ceilingFunc is a XPath Node Set functions ceiling(node-set).
func ceilingFunc(q query, t iterator) interface{} {
val := asNumber(t, q.Evaluate(t))
val := asNumber(t, functionArgs(q).Evaluate(t))
return math.Ceil(val)
}
// floorFunc is a XPath Node Set functions floor(node-set).
func floorFunc(q query, t iterator) interface{} {
val := asNumber(t, q.Evaluate(t))
val := asNumber(t, functionArgs(q).Evaluate(t))
return math.Floor(val)
}
// roundFunc is a XPath Node Set functions round(node-set).
func roundFunc(q query, t iterator) interface{} {
val := asNumber(t, q.Evaluate(t))
val := asNumber(t, functionArgs(q).Evaluate(t))
//return math.Round(val)
return round(val)
}
@ -140,7 +156,7 @@ func nameFunc(arg query) func(query, iterator) interface{} {
if arg == nil {
v = t.Current()
} else {
v = arg.Select(t)
v = arg.Clone().Select(t)
if v == nil {
return ""
}
@ -160,7 +176,7 @@ func localNameFunc(arg query) func(query, iterator) interface{} {
if arg == nil {
v = t.Current()
} else {
v = arg.Select(t)
v = arg.Clone().Select(t)
if v == nil {
return ""
}
@ -177,7 +193,7 @@ func namespaceFunc(arg query) func(query, iterator) interface{} {
v = t.Current()
} else {
// Get the first node in the node-set if specified.
v = arg.Select(t)
v = arg.Clone().Select(t)
if v == nil {
return ""
}
@ -201,7 +217,7 @@ func asBool(t iterator, v interface{}) bool {
case *NodeIterator:
return v.MoveNext()
case bool:
return bool(v)
return v
case float64:
return v != 0
case string:
@ -239,19 +255,19 @@ func asString(t iterator, v interface{}) string {
// booleanFunc is a XPath functions boolean([node-set]).
func booleanFunc(q query, t iterator) interface{} {
v := q.Evaluate(t)
v := functionArgs(q).Evaluate(t)
return asBool(t, v)
}
// numberFunc is a XPath functions number([node-set]).
func numberFunc(q query, t iterator) interface{} {
v := q.Evaluate(t)
v := functionArgs(q).Evaluate(t)
return asNumber(t, v)
}
// stringFunc is a XPath functions string([node-set]).
func stringFunc(q query, t iterator) interface{} {
v := q.Evaluate(t)
v := functionArgs(q).Evaluate(t)
return asString(t, v)
}
@ -338,15 +354,39 @@ func containsFunc(arg1, arg2 query) func(query, iterator) interface{} {
}
}
var (
regnewline = regexp.MustCompile(`[\r\n\t]`)
regseqspace = regexp.MustCompile(`\s{2,}`)
)
// matchesFunc is an XPath function that tests a given string against a regexp pattern.
// Note: does not support https://www.w3.org/TR/xpath-functions-31/#func-matches 3rd optional `flags` argument; if
// needed, directly put flags in the regexp pattern, such as `(?i)^pattern$` for `i` flag.
func matchesFunc(arg1, arg2 query) func(query, iterator) interface{} {
return func(q query, t iterator) interface{} {
var s string
switch typ := functionArgs(arg1).Evaluate(t).(type) {
case string:
s = typ
case query:
node := typ.Select(t)
if node == nil {
return ""
}
s = node.Value()
}
var pattern string
var ok bool
if pattern, ok = functionArgs(arg2).Evaluate(t).(string); !ok {
panic(errors.New("matches() function second argument type must be string"))
}
re, err := getRegexp(pattern)
if err != nil {
panic(fmt.Errorf("matches() function second argument is not a valid regexp pattern, err: %s", err.Error()))
}
return re.MatchString(s)
}
}
// normalizespaceFunc is XPath functions normalize-space(string?)
func normalizespaceFunc(q query, t iterator) interface{} {
var m string
switch typ := q.Evaluate(t).(type) {
switch typ := functionArgs(q).Evaluate(t).(type) {
case string:
m = typ
case query:
@ -356,10 +396,26 @@ func normalizespaceFunc(q query, t iterator) interface{} {
}
m = node.Value()
}
m = strings.TrimSpace(m)
m = regnewline.ReplaceAllString(m, " ")
m = regseqspace.ReplaceAllString(m, " ")
return m
var b = builderPool.Get().(stringBuilder)
b.Grow(len(m))
runeStr := []rune(strings.TrimSpace(m))
l := len(runeStr)
for i := range runeStr {
r := runeStr[i]
isSpace := unicode.IsSpace(r)
if !(isSpace && (i+1 < l && unicode.IsSpace(runeStr[i+1]))) {
if isSpace {
r = ' '
}
b.WriteRune(r)
}
}
result := b.String()
b.Reset()
builderPool.Put(b)
return result
}
// substringFunc is XPath functions substring function returns a part of a given string.
@ -466,7 +522,7 @@ func translateFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} {
src := asString(t, functionArgs(arg2).Evaluate(t))
dst := asString(t, functionArgs(arg3).Evaluate(t))
var replace []string
replace := make([]string, 0, len(src))
for i, s := range src {
d := ""
if i < len(dst) {
@ -491,7 +547,7 @@ func replaceFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} {
// notFunc is XPATH functions not(expression) function operation.
func notFunc(q query, t iterator) interface{} {
switch v := q.Evaluate(t).(type) {
switch v := functionArgs(q).Evaluate(t).(type) {
case bool:
return !v
case query:
@ -507,20 +563,25 @@ func notFunc(q query, t iterator) interface{} {
// concat( string1 , string2 [, stringn]* )
func concatFunc(args ...query) func(query, iterator) interface{} {
return func(q query, t iterator) interface{} {
var a []string
b := builderPool.Get().(stringBuilder)
for _, v := range args {
v = functionArgs(v)
switch v := v.Evaluate(t).(type) {
case string:
a = append(a, v)
b.WriteString(v)
case query:
node := v.Select(t)
if node != nil {
a = append(a, node.Value())
b.WriteString(node.Value())
}
}
}
return strings.Join(a, "")
result := b.String()
b.Reset()
builderPool.Put(b)
return result
}
}

View file

@ -2,8 +2,15 @@
package xpath
import "math"
import (
"math"
"strings"
)
func round(f float64) int {
return int(math.Round(f))
}
func newStringBuilder() stringBuilder{
return &strings.Builder{}
}

View file

@ -2,7 +2,10 @@
package xpath
import "math"
import (
"bytes"
"math"
)
// math.Round() is supported by Go 1.10+,
// This method just compatible for version <1.10.
@ -13,3 +16,7 @@ func round(f float64) int {
}
return int(f + math.Copysign(0.5, f))
}
func newStringBuilder() stringBuilder {
return &bytes.Buffer{}
}

View file

@ -65,6 +65,7 @@ const (
nodeOperator
nodeVariable
nodeConstantOperand
nodeGroup
)
type parser struct {
@ -104,6 +105,10 @@ func newFilterNode(n, m node) node {
return &filterNode{nodeType: nodeFilter, Input: n, Condition: m}
}
func newGroupNode(n node) node {
return &groupNode{nodeType: nodeGroup, Input: n}
}
// newRootNode returns a root node.
func newRootNode(s string) node {
return &rootNode{nodeType: nodeRoot, slash: s}
@ -492,6 +497,9 @@ func (p *parser) parsePrimaryExpr(n node) (opnd node) {
case itemLParens:
p.next()
opnd = p.parseExpression(n)
if opnd.Type() != nodeConstantOperand {
opnd = newGroupNode(opnd)
}
p.skipItem(itemRParens)
case itemName:
if p.r.canBeFunc && !isNodeType(p.r) {
@ -587,6 +595,16 @@ func (o *operandNode) String() string {
return fmt.Sprintf("%v", o.Val)
}
// groupNode holds a set of node expression
type groupNode struct {
nodeType
Input node
}
func (g *groupNode) String() string {
return fmt.Sprintf("%s", g.Input)
}
// filterNode holds a condition filter.
type filterNode struct {
nodeType

View file

@ -76,6 +76,7 @@ func (a *ancestorQuery) Select(t iterator) NodeNavigator {
return nil
}
first := true
node = node.Copy()
a.iterator = func() NodeNavigator {
if first && a.Self {
first = false
@ -668,6 +669,35 @@ func (c *constantQuery) Clone() query {
return c
}
type groupQuery struct {
posit int
Input query
}
func (g *groupQuery) Select(t iterator) NodeNavigator {
for {
node := g.Input.Select(t)
if node == nil {
return nil
}
g.posit++
return node.Copy()
}
}
func (g *groupQuery) Evaluate(t iterator) interface{} {
return g.Input.Evaluate(t)
}
func (g *groupQuery) Clone() query {
return &groupQuery{Input: g.Input}
}
func (g *groupQuery) position() int {
return g.posit
}
// logicalQuery is an XPath logical expression.
type logicalQuery struct {
Left, Right query

View file

@ -0,0 +1,26 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris
// +build darwin dragonfly freebsd linux netbsd openbsd solaris
package socket
import (
"syscall"
)
// ioComplete checks the flags and result of a syscall, to be used as return
// value in a syscall.RawConn.Read or Write callback.
func ioComplete(flags int, operr error) bool {
if flags&syscall.MSG_DONTWAIT != 0 {
// Caller explicitly said don't wait, so always return immediately.
return true
}
if operr == syscall.EAGAIN || operr == syscall.EWOULDBLOCK {
// No data available, block for I/O and try again.
return false
}
return true
}

View file

@ -0,0 +1,22 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build aix || windows || zos
// +build aix windows zos
package socket
import (
"syscall"
)
// ioComplete checks the flags and result of a syscall, to be used as return
// value in a syscall.RawConn.Read or Write callback.
func ioComplete(flags int, operr error) bool {
if operr == syscall.EAGAIN || operr == syscall.EWOULDBLOCK {
// No data available, block for I/O and try again.
return false
}
return true
}

View file

@ -2,6 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build darwin && go1.12
// +build darwin,go1.12
// This exists solely so we can linkname in symbols from syscall.

View file

@ -10,7 +10,6 @@ package socket
import (
"net"
"os"
"syscall"
)
func (c *Conn) recvMsgs(ms []Message, flags int) (int, error) {
@ -28,10 +27,7 @@ func (c *Conn) recvMsgs(ms []Message, flags int) (int, error) {
var n int
fn := func(s uintptr) bool {
n, operr = recvmmsg(s, hs, flags)
if operr == syscall.EAGAIN {
return false
}
return true
return ioComplete(flags, operr)
}
if err := c.c.Read(fn); err != nil {
return n, err
@ -60,10 +56,7 @@ func (c *Conn) sendMsgs(ms []Message, flags int) (int, error) {
var n int
fn := func(s uintptr) bool {
n, operr = sendmmsg(s, hs, flags)
if operr == syscall.EAGAIN {
return false
}
return true
return ioComplete(flags, operr)
}
if err := c.c.Write(fn); err != nil {
return n, err

View file

@ -9,7 +9,6 @@ package socket
import (
"os"
"syscall"
)
func (c *Conn) recvMsg(m *Message, flags int) error {
@ -25,10 +24,7 @@ func (c *Conn) recvMsg(m *Message, flags int) error {
var n int
fn := func(s uintptr) bool {
n, operr = recvmsg(s, &h, flags)
if operr == syscall.EAGAIN || operr == syscall.EWOULDBLOCK {
return false
}
return true
return ioComplete(flags, operr)
}
if err := c.c.Read(fn); err != nil {
return err
@ -64,10 +60,7 @@ func (c *Conn) sendMsg(m *Message, flags int) error {
var n int
fn := func(s uintptr) bool {
n, operr = sendmsg(s, &h, flags)
if operr == syscall.EAGAIN || operr == syscall.EWOULDBLOCK {
return false
}
return true
return ioComplete(flags, operr)
}
if err := c.c.Write(fn); err != nil {
return err

View file

@ -2,8 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build aix || darwin || dragonfly || freebsd || openbsd
// +build aix darwin dragonfly freebsd openbsd
//go:build aix || darwin || dragonfly || freebsd || openbsd || solaris
// +build aix darwin dragonfly freebsd openbsd solaris
package socket

View file

@ -1,43 +0,0 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build aix || (go1.12 && darwin)
// +build aix go1.12,darwin
package socket
import (
"syscall"
"unsafe"
)
//go:linkname syscall_getsockopt syscall.getsockopt
func syscall_getsockopt(s int, level int, name int, val unsafe.Pointer, vallen *uint32) error
func getsockopt(s uintptr, level, name int, b []byte) (int, error) {
l := uint32(len(b))
err := syscall_getsockopt(int(s), level, name, unsafe.Pointer(&b[0]), &l)
return int(l), err
}
//go:linkname syscall_setsockopt syscall.setsockopt
func syscall_setsockopt(s int, level int, name int, val unsafe.Pointer, vallen uintptr) error
func setsockopt(s uintptr, level, name int, b []byte) error {
return syscall_setsockopt(int(s), level, name, unsafe.Pointer(&b[0]), uintptr(len(b)))
}
//go:linkname syscall_recvmsg syscall.recvmsg
func syscall_recvmsg(s int, msg *syscall.Msghdr, flags int) (n int, err error)
func recvmsg(s uintptr, h *msghdr, flags int) (int, error) {
return syscall_recvmsg(int(s), (*syscall.Msghdr)(unsafe.Pointer(h)), flags)
}
//go:linkname syscall_sendmsg syscall.sendmsg
func syscall_sendmsg(s int, msg *syscall.Msghdr, flags int) (n int, err error)
func sendmsg(s uintptr, h *msghdr, flags int) (int, error) {
return syscall_sendmsg(int(s), (*syscall.Msghdr)(unsafe.Pointer(h)), flags)
}

View file

@ -10,10 +10,6 @@ import (
)
const (
sysSETSOCKOPT = 0xe
sysGETSOCKOPT = 0xf
sysSENDMSG = 0x10
sysRECVMSG = 0x11
sysRECVMMSG = 0x13
sysSENDMMSG = 0x14
)
@ -21,27 +17,6 @@ const (
func socketcall(call, a0, a1, a2, a3, a4, a5 uintptr) (uintptr, syscall.Errno)
func rawsocketcall(call, a0, a1, a2, a3, a4, a5 uintptr) (uintptr, syscall.Errno)
func getsockopt(s uintptr, level, name int, b []byte) (int, error) {
l := uint32(len(b))
_, errno := socketcall(sysGETSOCKOPT, s, uintptr(level), uintptr(name), uintptr(unsafe.Pointer(&b[0])), uintptr(unsafe.Pointer(&l)), 0)
return int(l), errnoErr(errno)
}
func setsockopt(s uintptr, level, name int, b []byte) error {
_, errno := socketcall(sysSETSOCKOPT, s, uintptr(level), uintptr(name), uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)), 0)
return errnoErr(errno)
}
func recvmsg(s uintptr, h *msghdr, flags int) (int, error) {
n, errno := socketcall(sysRECVMSG, s, uintptr(unsafe.Pointer(h)), uintptr(flags), 0, 0, 0)
return int(n), errnoErr(errno)
}
func sendmsg(s uintptr, h *msghdr, flags int) (int, error) {
n, errno := socketcall(sysSENDMSG, s, uintptr(unsafe.Pointer(h)), uintptr(flags), 0, 0, 0)
return int(n), errnoErr(errno)
}
func recvmmsg(s uintptr, hs []mmsghdr, flags int) (int, error) {
n, errno := socketcall(sysRECVMMSG, s, uintptr(unsafe.Pointer(&hs[0])), uintptr(len(hs)), uintptr(flags), 0, 0)
return int(n), errnoErr(errno)

View file

@ -10,10 +10,6 @@ import (
)
const (
sysSETSOCKOPT = 0xe
sysGETSOCKOPT = 0xf
sysSENDMSG = 0x10
sysRECVMSG = 0x11
sysRECVMMSG = 0x13
sysSENDMMSG = 0x14
)
@ -21,27 +17,6 @@ const (
func socketcall(call, a0, a1, a2, a3, a4, a5 uintptr) (uintptr, syscall.Errno)
func rawsocketcall(call, a0, a1, a2, a3, a4, a5 uintptr) (uintptr, syscall.Errno)
func getsockopt(s uintptr, level, name int, b []byte) (int, error) {
l := uint32(len(b))
_, errno := socketcall(sysGETSOCKOPT, s, uintptr(level), uintptr(name), uintptr(unsafe.Pointer(&b[0])), uintptr(unsafe.Pointer(&l)), 0)
return int(l), errnoErr(errno)
}
func setsockopt(s uintptr, level, name int, b []byte) error {
_, errno := socketcall(sysSETSOCKOPT, s, uintptr(level), uintptr(name), uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)), 0)
return errnoErr(errno)
}
func recvmsg(s uintptr, h *msghdr, flags int) (int, error) {
n, errno := socketcall(sysRECVMSG, s, uintptr(unsafe.Pointer(h)), uintptr(flags), 0, 0, 0)
return int(n), errnoErr(errno)
}
func sendmsg(s uintptr, h *msghdr, flags int) (int, error) {
n, errno := socketcall(sysSENDMSG, s, uintptr(unsafe.Pointer(h)), uintptr(flags), 0, 0, 0)
return int(n), errnoErr(errno)
}
func recvmmsg(s uintptr, hs []mmsghdr, flags int) (int, error) {
n, errno := socketcall(sysRECVMMSG, s, uintptr(unsafe.Pointer(&hs[0])), uintptr(len(hs)), uintptr(flags), 0, 0)
return int(n), errnoErr(errno)

View file

@ -1,59 +0,0 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package socket
import (
"syscall"
"unsafe"
)
//go:cgo_import_dynamic libc___xnet_getsockopt __xnet_getsockopt "libsocket.so"
//go:cgo_import_dynamic libc_setsockopt setsockopt "libsocket.so"
//go:cgo_import_dynamic libc___xnet_recvmsg __xnet_recvmsg "libsocket.so"
//go:cgo_import_dynamic libc___xnet_sendmsg __xnet_sendmsg "libsocket.so"
//go:linkname procGetsockopt libc___xnet_getsockopt
//go:linkname procSetsockopt libc_setsockopt
//go:linkname procRecvmsg libc___xnet_recvmsg
//go:linkname procSendmsg libc___xnet_sendmsg
var (
procGetsockopt uintptr
procSetsockopt uintptr
procRecvmsg uintptr
procSendmsg uintptr
)
func sysvicall6(trap, nargs, a1, a2, a3, a4, a5, a6 uintptr) (uintptr, uintptr, syscall.Errno)
func rawSysvicall6(trap, nargs, a1, a2, a3, a4, a5, a6 uintptr) (uintptr, uintptr, syscall.Errno)
func getsockopt(s uintptr, level, name int, b []byte) (int, error) {
l := uint32(len(b))
_, _, errno := sysvicall6(uintptr(unsafe.Pointer(&procGetsockopt)), 5, s, uintptr(level), uintptr(name), uintptr(unsafe.Pointer(&b[0])), uintptr(unsafe.Pointer(&l)), 0)
return int(l), errnoErr(errno)
}
func setsockopt(s uintptr, level, name int, b []byte) error {
_, _, errno := sysvicall6(uintptr(unsafe.Pointer(&procSetsockopt)), 5, s, uintptr(level), uintptr(name), uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)), 0)
return errnoErr(errno)
}
func recvmsg(s uintptr, h *msghdr, flags int) (int, error) {
n, _, errno := sysvicall6(uintptr(unsafe.Pointer(&procRecvmsg)), 3, s, uintptr(unsafe.Pointer(h)), uintptr(flags), 0, 0, 0)
return int(n), errnoErr(errno)
}
func sendmsg(s uintptr, h *msghdr, flags int) (int, error) {
n, _, errno := sysvicall6(uintptr(unsafe.Pointer(&procSendmsg)), 3, s, uintptr(unsafe.Pointer(h)), uintptr(flags), 0, 0, 0)
return int(n), errnoErr(errno)
}
func recvmmsg(s uintptr, hs []mmsghdr, flags int) (int, error) {
return 0, errNotImplemented
}
func sendmmsg(s uintptr, hs []mmsghdr, flags int) (int, error) {
return 0, errNotImplemented
}

View file

@ -1,11 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "textflag.h"
TEXT ·sysvicall6(SB),NOSPLIT,$0-88
JMP syscall·sysvicall6(SB)
TEXT ·rawSysvicall6(SB),NOSPLIT,$0-88
JMP syscall·rawSysvicall6(SB)

View file

@ -2,8 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build dragonfly || freebsd || (linux && !s390x && !386) || netbsd || openbsd
// +build dragonfly freebsd linux,!s390x,!386 netbsd openbsd
//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris
// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris
package socket
@ -12,23 +12,32 @@ import (
"unsafe"
)
//go:linkname syscall_getsockopt syscall.getsockopt
func syscall_getsockopt(s, level, name int, val unsafe.Pointer, vallen *uint32) error
//go:linkname syscall_setsockopt syscall.setsockopt
func syscall_setsockopt(s, level, name int, val unsafe.Pointer, vallen uintptr) error
//go:linkname syscall_recvmsg syscall.recvmsg
func syscall_recvmsg(s int, msg *syscall.Msghdr, flags int) (int, error)
//go:linkname syscall_sendmsg syscall.sendmsg
func syscall_sendmsg(s int, msg *syscall.Msghdr, flags int) (int, error)
func getsockopt(s uintptr, level, name int, b []byte) (int, error) {
l := uint32(len(b))
_, _, errno := syscall.Syscall6(syscall.SYS_GETSOCKOPT, s, uintptr(level), uintptr(name), uintptr(unsafe.Pointer(&b[0])), uintptr(unsafe.Pointer(&l)), 0)
return int(l), errnoErr(errno)
err := syscall_getsockopt(int(s), level, name, unsafe.Pointer(&b[0]), &l)
return int(l), err
}
func setsockopt(s uintptr, level, name int, b []byte) error {
_, _, errno := syscall.Syscall6(syscall.SYS_SETSOCKOPT, s, uintptr(level), uintptr(name), uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)), 0)
return errnoErr(errno)
return syscall_setsockopt(int(s), level, name, unsafe.Pointer(&b[0]), uintptr(len(b)))
}
func recvmsg(s uintptr, h *msghdr, flags int) (int, error) {
n, _, errno := syscall.Syscall(syscall.SYS_RECVMSG, s, uintptr(unsafe.Pointer(h)), uintptr(flags))
return int(n), errnoErr(errno)
return syscall_recvmsg(int(s), (*syscall.Msghdr)(unsafe.Pointer(h)), flags)
}
func sendmsg(s uintptr, h *msghdr, flags int) (int, error) {
n, _, errno := syscall.Syscall(syscall.SYS_SENDMSG, s, uintptr(unsafe.Pointer(h)), uintptr(flags))
return int(n), errnoErr(errno)
return syscall_sendmsg(int(s), (*syscall.Msghdr)(unsafe.Pointer(h)), flags)
}

File diff suppressed because it is too large Load diff

View file

@ -251,6 +251,13 @@ func (t Tag) Parent() Tag {
// ParseExtension parses s as an extension and returns it on success.
func ParseExtension(s string) (ext string, err error) {
defer func() {
if recover() != nil {
ext = ""
err = ErrSyntax
}
}()
scan := makeScannerString(s)
var end int
if n := len(scan.token); n != 1 {
@ -461,7 +468,14 @@ func (t Tag) findTypeForKey(key string) (start, sep, end int, hasExt bool) {
// ParseBase parses a 2- or 3-letter ISO 639 code.
// It returns a ValueError if s is a well-formed but unknown language identifier
// or another error if another error occurred.
func ParseBase(s string) (Language, error) {
func ParseBase(s string) (l Language, err error) {
defer func() {
if recover() != nil {
l = 0
err = ErrSyntax
}
}()
if n := len(s); n < 2 || 3 < n {
return 0, ErrSyntax
}
@ -472,7 +486,14 @@ func ParseBase(s string) (Language, error) {
// ParseScript parses a 4-letter ISO 15924 code.
// It returns a ValueError if s is a well-formed but unknown script identifier
// or another error if another error occurred.
func ParseScript(s string) (Script, error) {
func ParseScript(s string) (scr Script, err error) {
defer func() {
if recover() != nil {
scr = 0
err = ErrSyntax
}
}()
if len(s) != 4 {
return 0, ErrSyntax
}
@ -489,7 +510,14 @@ func EncodeM49(r int) (Region, error) {
// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
// It returns a ValueError if s is a well-formed but unknown region identifier
// or another error if another error occurred.
func ParseRegion(s string) (Region, error) {
func ParseRegion(s string) (r Region, err error) {
defer func() {
if recover() != nil {
r = 0
err = ErrSyntax
}
}()
if n := len(s); n < 2 || 3 < n {
return 0, ErrSyntax
}
@ -578,7 +606,14 @@ type Variant struct {
// ParseVariant parses and returns a Variant. An error is returned if s is not
// a valid variant.
func ParseVariant(s string) (Variant, error) {
func ParseVariant(s string) (v Variant, err error) {
defer func() {
if recover() != nil {
v = Variant{}
err = ErrSyntax
}
}()
s = strings.ToLower(s)
if id, ok := variantIndex[s]; ok {
return Variant{id, s}, nil

View file

@ -232,6 +232,13 @@ func Parse(s string) (t Tag, err error) {
if s == "" {
return Und, ErrSyntax
}
defer func() {
if recover() != nil {
t = Und
err = ErrSyntax
return
}
}()
if len(s) <= maxAltTaglen {
b := [maxAltTaglen]byte{}
for i, c := range s {

View file

@ -43,6 +43,13 @@ func Parse(s string) (t Tag, err error) {
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// The resulting tag is canonicalized using the canonicalization type c.
func (c CanonType) Parse(s string) (t Tag, err error) {
defer func() {
if recover() != nil {
t = Tag{}
err = language.ErrSyntax
}
}()
tt, err := language.Parse(s)
if err != nil {
return makeTag(tt), err
@ -79,6 +86,13 @@ func Compose(part ...interface{}) (t Tag, err error) {
// tag is returned after canonicalizing using CanonType c. If one or more errors
// are encountered, one of the errors is returned.
func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
defer func() {
if recover() != nil {
t = Tag{}
err = language.ErrSyntax
}
}()
var b language.Builder
if err = update(&b, part...); err != nil {
return und, err
@ -142,6 +156,14 @@ var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
// Tags with a weight of zero will be dropped. An error will be returned if the
// input could not be parsed.
func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
defer func() {
if recover() != nil {
tag = nil
q = nil
err = language.ErrSyntax
}
}()
var entry string
for s != "" {
if entry, s = split(s, ','); entry == "" {

14
vendor/modules.txt vendored
View file

@ -44,11 +44,11 @@ github.com/anacrolix/dms/soap
github.com/anacrolix/dms/ssdp
github.com/anacrolix/dms/upnp
github.com/anacrolix/dms/upnpav
# github.com/antchfx/htmlquery v1.2.3
# github.com/antchfx/htmlquery v1.2.5-0.20211125074323-810ee8082758
## explicit; go 1.14
github.com/antchfx/htmlquery
# github.com/antchfx/xpath v1.1.6
## explicit
# github.com/antchfx/xpath v1.2.0
## explicit; go 1.14
github.com/antchfx/xpath
# github.com/chromedp/cdproto v0.0.0-20210622022015-fe1827b46b84
## explicit; go 1.14
@ -156,7 +156,7 @@ github.com/golang-migrate/migrate/v4/source
github.com/golang-migrate/migrate/v4/source/file
github.com/golang-migrate/migrate/v4/source/httpfs
github.com/golang-migrate/migrate/v4/source/iofs
# github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e
# github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da
## explicit
github.com/golang/groupcache/lru
# github.com/gorilla/securecookie v1.1.1
@ -388,7 +388,7 @@ golang.org/x/image/webp
## explicit; go 1.12
golang.org/x/mod/module
golang.org/x/mod/semver
# golang.org/x/net v0.0.0-20210520170846-37e1c6afe023
# golang.org/x/net v0.0.0-20211123203042-d83791d6bcd9
## explicit; go 1.17
golang.org/x/net/bpf
golang.org/x/net/context/ctxhttp
@ -410,8 +410,8 @@ golang.org/x/sys/windows
# golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b
## explicit; go 1.17
golang.org/x/term
# golang.org/x/text v0.3.6
## explicit; go 1.11
# golang.org/x/text v0.3.7
## explicit; go 1.17
golang.org/x/text/collate
golang.org/x/text/encoding
golang.org/x/text/encoding/charmap