From 4b53ea3a4e464252bf69da827f5db1d65a55a902 Mon Sep 17 00:00:00 2001 From: Henry Rich Date: Thu, 23 Jan 2025 17:46:45 -0500 Subject: [PATCH] Bug in complementary x { y mid-size x; remove pristine flags from result of ([-.-.); special case for list -. atom --- jsrc/vfrom.c | 6 +++--- jsrc/viavx.c | 36 +++++++++++++++++++++++------------- test/g520.ijs | 12 ++++++++++++ test/g631.ijs | 2 ++ 4 files changed, 40 insertions(+), 16 deletions(-) diff --git a/jsrc/vfrom.c b/jsrc/vfrom.c index 5f1a3ba77..257be53a4 100644 --- a/jsrc/vfrom.c +++ b/jsrc/vfrom.c @@ -40,7 +40,7 @@ DF1(jtcatalog){PROLOG(0072);A b,*wv,x,z,*zv;C*bu,*bv,**pv;I*cv,i,j,k,m=1,n,p,*qv struct __attribute__((aligned(CACHELINESIZE))) faxis { I lenaxis; // the length of the axes (including frame) represented by this faxis struct, in items I lencell; // size of item of this axis in atoms - I nsel; // number of selectors. If negative, axis is complementary and *sels is a bitmask, value is ~len + I nsel; // number of selectors. If negative, axis is complementary; nsel has ~(# of items to move to result) union { A ind; // (up until result allocation) the original block of selectors for this axis, for rank purposes, or 0 if none. I subx; // (for complementary axes only, during the copy) index of next value to skip @@ -205,8 +205,8 @@ static A jtaxisfrom(J jt,A w,struct faxis *axes,I rflags){F2PREFIP; I i; // complementary indexing. See if the values are consecutive (in-full is impossible) // The first index we produce is .sel0, which means that must be the index of the first gap. Find the width // of that gap and see if it accounts for all the indexes - nsel=~nsel; // convert nsel to positive length - gives number of surviving - index0=axes[r].sel0; I axn=axes[r].lenaxis; I indexn=likely(index0indexn-index0)goto novirtual; // if the first gap does not account for all the selectors, we can't make a virtual one // obsolete index0|=(indexn-index0)-nsel; // if more cells than gap, turn index0 neg to stop virtual // obsolete --indexn; // convert to end of gap diff --git a/jsrc/viavx.c b/jsrc/viavx.c index 58b8448f3..384f80aec 100644 --- a/jsrc/viavx.c +++ b/jsrc/viavx.c @@ -750,6 +750,7 @@ A jtindexofsub(J jt,I mode,AD * RESTRICT a,AD * RESTRICT w){F2PREFIP;PROLOG(0079 // it must not use sequential search if the comparison is inexact, because then it would conflict with nub and just generally fail because values not in // the nub could match later values leaving omitted values mode |= IIOREPS&((((((I)1)<cct)[1])>=(UI)((at)&(FL|CMPX|BOX)))<>mode); // remember if i./i:/e./key (and not prehash) /. is OK if a is not float + // *************************************** test for linear search ******************************************* // The comparison uses the fact that cct can never go above 1.0, which is 0x3ff0000000000000 in double precision. To avoid integer-float conversions, we just strip out the bit that signifies // 1.0. The expression then means 'tolerance=1.0 or intolerant comparison' // If the problem is small, use sequential search to save analysis and hashing time @@ -758,7 +759,7 @@ A jtindexofsub(J jt,I mode,AD * RESTRICT a,AD * RESTRICT w){F2PREFIP;PROLOG(0079 // For the hash lookup: ~10 clocks per atom of w // For the sequential search: 1/2 clock per a*w // units below are #atoms of sequential search per hash - I seqtime; DPMULD(an,wn,seqtime,seqtime=IMAX;); + I seqtime; DPMULD(MAX(an,NPAR*2),wn,seqtime,seqtime=IMAX;); // MAX to account for loop overhead in small a // TUNE From testing 5/2021 on SkylakeX. Hard to tell, since the data is brought into cache #define HASHTIMEA (5) // hashing time for a #define HASHTIMEW (4) // hashing time for w @@ -1246,14 +1247,22 @@ F2(jtless){A x=w;I ar,at,k,r,*s,wr,*ws; wr=AR(w); r=MAX(1,ar); I wn=AN(w); I wi,ai; SETIC(w,wi); SETIC(a,ai); if(unlikely(ar>1+wr))RCA(a); // if w's rank is smaller than that of a cell of a, nothing can be removed, return a if(unlikely(MIN(ai,wi)==0)&&(ar!=0))RCA(a); // if either arg has no items, there's nothing to remove, return a, unless atom must become a list - // if w's rank is larger than that of a cell of a, reheader w to look like a list of such cells - if(unlikely((-wr&-(r^wr))<0)){RZ(x=virtual(w,0,r)); AN(x)=wn; s=AS(x); ws=AS(w); k=ar>wr?0:1+wr-r; I s0; PRODX(s0,k,ws,1) s[0]=s0; MCISH(1+s,k+ws,r-1);} // use fauxvirtual here - // if nothing special (like sparse, or incompatible types, or x requires conversion) do the fast way; otherwise (-. x e. y) # x - // because LESS allocates a large array to hold all the values, we use the slower, less memory-intensive, version if a is mapped - RZ(x=(SGNIFSPARSE(at)|SGNIF(AFLAG(a),AFNJAX))>=0?jtindexofsub(jtinplace,ILESS,x,a): - repeat(not(eps(a,x)),a)); - // We extracted from a, so mark it (or its backer if virtual) non-pristine. If a was pristine and inplaceable, transfer its pristine status to the result - PRISTXFERAF(x,a) + if(ar==wr+1){ // is just 1 cell of y? + // if y has rank 1 less than x, execute as ((x ~: y) # x) if y is atomic or ((x ~.@-:"yr) # x) if y is an array. Inplace x. Use IRS and leave comparison tolerance as set + J jtipx=(J)(((I)jtinplace&~(JTINPLACEA+JTINPLACEW))+(((I)jtinplace>>1)&JTINPLACEW)); // move input inplace-x flag to inplace-w + if(wr==0){RZ(x=jtrepeat(jtipx,ne(a,w),a)) // ((x ~: y) # x), inplaceable on the # + }else{IRS2(a,w,0,wr,wr,jtnotmatch,x); RZ(x=jtrepeat(jtipx,x,a)) // ((x ~.@-:"yr) # x), inplaceable on the # + } + }else{ + // if w's rank is larger than that of a cell of a, reheader w to look like a list of such cells + if(unlikely((-wr&-(r^wr))<0)){RZ(x=virtual(w,0,r)); AN(x)=wn; s=AS(x); ws=AS(w); k=ar>wr?0:1+wr-r; I s0; PRODX(s0,k,ws,1) s[0]=s0; MCISH(1+s,k+ws,r-1);} // use fauxvirtual here + // if nothing special (like sparse, or incompatible types, or x requires conversion) do the fast way; otherwise (-. y e. x) # x + // because LESS allocates a large array to hold all the values, we use the slower, less memory-intensive, version if a is mapped + RZ(x=(SGNIFSPARSE(at)|SGNIF(AFLAG(a),AFNJAX))>=0?jtindexofsub(jtinplace,ILESS,x,a): + repeat(not(eps(a,x)),a)); + // We extracted from a, so mark it (or its backer if virtual) non-pristine. If a was pristine and inplaceable, transfer its pristine status to the result + } + if(unlikely(at&BOX))PRISTXFERAF(x,a) RETF(x); } /* a-.w */ @@ -1264,22 +1273,23 @@ DF2(jtintersect){A x=w;I ar,at,k,r,*s,wr,*ws; wr=AR(w); r=MAX(1,ar); I wn=AN(w); I wi,ai; SETIC(w,wi); SETIC(a,ai); if(unlikely(ar>1+wr))R take(zeroionei(0),a); // if w's rank is smaller than that of a cell of a, nothing can be common, return no items if(unlikely(MIN(ai,wi)==0))R take(zeroionei(0),a); // if either arg is empty, nothing can be common, return no items + // scaf consider fast version if y has only one cell // if w's rank is larger than that of a cell of a, reheader w to look like a list of such cells if(unlikely((-wr&-(r^wr))<0)){RZ(x=virtual(w,0,r)); AN(x)=wn; s=AS(x); ws=AS(w); k=ar>wr?0:1+wr-r; I s0; PRODX(s0,k,ws,1) s[0]=s0; MCISH(1+s,k+ws,r-1);} // use fauxvirtual here // comparison tolerance may be encoded in h - apply it if so D savcct = jt->cct; PUSHCCTIF(FAV(self)->localuse.lu1.cct,FAV(self)->localuse.lu1.cct!=0) // if there is a CT, use it - // if nothing special (like sparse, or incompatible types, or x requires conversion) do the fast way; otherwise (-. x e. y) # x + // if nothing special (like sparse, or incompatible types, or x requires conversion) do the fast way; otherwise (x e. y) # x // because LESS allocates a large array to hold all the values, we use the slower, less memory-intensive, version if a is mapped // Don't revert to fork! localuse.lu1.fork2hfn is not set x=(SGNIFSPARSE(at)|SGNIF(AFLAG(a),AFNJAX))>=0?jtindexofsub(jtinplace,IINTER,x,a): repeat(eps(a,x),a); POPCCT RZ(x); - // We extracted from a, so mark it (or its backer if virtual) non-pristine. If a was pristine and inplaceable, transfer its pristine status to the result - PRISTXFERAF(x,a) +// obsolete PRISTXFERAF(x,a) + if(unlikely(at&BOX)){PRISTCLRF(a) PRISTCLRF(w)} // result is non-pristine, and both a and w also RETF(x); -} /* a-.w */ +} // x e. y F2(jteps){I l,r; diff --git a/test/g520.ijs b/test/g520.ijs index 685984c35..af2bdd901 100644 --- a/test/g520.ijs +++ b/test/g520.ijs @@ -1610,6 +1610,18 @@ a =: 2 3 4$;:'+/..*(1 0 1#"2 a)-:(. ? 40 NB. deal the array and the comp indexes +assert. (2 ! shp) = (+/~.dels) + +/ (<< ckprist 0 1 1 ] 5 NB. scaf '36' ($,) ckprist 0 0 0 0 ] 35 ~. ckprist 0 1 0 0 ] 5 ~. ckprist 0 1 0 0 ] 4 5 +'<"0 i. 3' -. ckprist 0 1 2 0 ] 5 +'<"0 i. 3' ([-.-.) ckprist 0 0 ] 5 |. ckprist 0 1 0 0 ] 5 |.!.a: ckprist 0 0 0 0 ] 5 '3' |. ckprist 0 1 0 0 ] 5