加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
8231441-2-AArch64-Initial-SVE-backend-support.patch 95.54 KB
一键复制 编辑 原始数据 按行查看 历史
kuen 提交于 2022-02-08 17:37 . I4SV7X: upgrade to jdk11.0.14-ga
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201
diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad
index b64919a62..64985e498 100644
--- a/src/hotspot/cpu/aarch64/aarch64.ad
+++ b/src/hotspot/cpu/aarch64/aarch64.ad
@@ -69,7 +69,7 @@ register %{
//
// r0-r7,r10-r26 volatile (caller save)
// r27-r32 system (no save, no allocate)
-// r8-r9 invisible to the allocator (so we can use them as scratch regs)
+// r8-r9 non-allocatable (so we can use them as scratch regs)
//
// as regards Java usage. we don't use any callee save registers
// because this makes it difficult to de-optimise a frame (see comment
@@ -94,6 +94,10 @@ reg_def R6 ( SOC, SOC, Op_RegI, 6, r6->as_VMReg() );
reg_def R6_H ( SOC, SOC, Op_RegI, 6, r6->as_VMReg()->next() );
reg_def R7 ( SOC, SOC, Op_RegI, 7, r7->as_VMReg() );
reg_def R7_H ( SOC, SOC, Op_RegI, 7, r7->as_VMReg()->next() );
+reg_def R8 ( NS, SOC, Op_RegI, 8, r8->as_VMReg() ); // rscratch1, non-allocatable
+reg_def R8_H ( NS, SOC, Op_RegI, 8, r8->as_VMReg()->next() );
+reg_def R9 ( NS, SOC, Op_RegI, 9, r9->as_VMReg() ); // rscratch2, non-allocatable
+reg_def R9_H ( NS, SOC, Op_RegI, 9, r9->as_VMReg()->next() );
reg_def R10 ( SOC, SOC, Op_RegI, 10, r10->as_VMReg() );
reg_def R10_H ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
reg_def R11 ( SOC, SOC, Op_RegI, 11, r11->as_VMReg() );
@@ -140,7 +144,7 @@ reg_def R31 ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg() ); // sp
reg_def R31_H ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
// ----------------------------
-// Float/Double Registers
+// Float/Double/Vector Registers
// ----------------------------
// Double Registers
@@ -161,165 +165,317 @@ reg_def R31_H ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
// the platform ABI treats v8-v15 as callee save). float registers
// v16-v31 are SOC as per the platform spec
- reg_def V0 ( SOC, SOC, Op_RegF, 0, v0->as_VMReg() );
- reg_def V0_H ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next() );
- reg_def V0_J ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(2) );
- reg_def V0_K ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(3) );
-
- reg_def V1 ( SOC, SOC, Op_RegF, 1, v1->as_VMReg() );
- reg_def V1_H ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next() );
- reg_def V1_J ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(2) );
- reg_def V1_K ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(3) );
-
- reg_def V2 ( SOC, SOC, Op_RegF, 2, v2->as_VMReg() );
- reg_def V2_H ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next() );
- reg_def V2_J ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(2) );
- reg_def V2_K ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(3) );
-
- reg_def V3 ( SOC, SOC, Op_RegF, 3, v3->as_VMReg() );
- reg_def V3_H ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next() );
- reg_def V3_J ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(2) );
- reg_def V3_K ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(3) );
-
- reg_def V4 ( SOC, SOC, Op_RegF, 4, v4->as_VMReg() );
- reg_def V4_H ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next() );
- reg_def V4_J ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(2) );
- reg_def V4_K ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(3) );
-
- reg_def V5 ( SOC, SOC, Op_RegF, 5, v5->as_VMReg() );
- reg_def V5_H ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next() );
- reg_def V5_J ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(2) );
- reg_def V5_K ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(3) );
-
- reg_def V6 ( SOC, SOC, Op_RegF, 6, v6->as_VMReg() );
- reg_def V6_H ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next() );
- reg_def V6_J ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(2) );
- reg_def V6_K ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(3) );
-
- reg_def V7 ( SOC, SOC, Op_RegF, 7, v7->as_VMReg() );
- reg_def V7_H ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next() );
- reg_def V7_J ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(2) );
- reg_def V7_K ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(3) );
-
- reg_def V8 ( SOC, SOE, Op_RegF, 8, v8->as_VMReg() );
- reg_def V8_H ( SOC, SOE, Op_RegF, 8, v8->as_VMReg()->next() );
- reg_def V8_J ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(2) );
- reg_def V8_K ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(3) );
-
- reg_def V9 ( SOC, SOE, Op_RegF, 9, v9->as_VMReg() );
- reg_def V9_H ( SOC, SOE, Op_RegF, 9, v9->as_VMReg()->next() );
- reg_def V9_J ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(2) );
- reg_def V9_K ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(3) );
-
- reg_def V10 ( SOC, SOE, Op_RegF, 10, v10->as_VMReg() );
- reg_def V10_H( SOC, SOE, Op_RegF, 10, v10->as_VMReg()->next() );
- reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
- reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
-
- reg_def V11 ( SOC, SOE, Op_RegF, 11, v11->as_VMReg() );
- reg_def V11_H( SOC, SOE, Op_RegF, 11, v11->as_VMReg()->next() );
- reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
- reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
-
- reg_def V12 ( SOC, SOE, Op_RegF, 12, v12->as_VMReg() );
- reg_def V12_H( SOC, SOE, Op_RegF, 12, v12->as_VMReg()->next() );
- reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
- reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
-
- reg_def V13 ( SOC, SOE, Op_RegF, 13, v13->as_VMReg() );
- reg_def V13_H( SOC, SOE, Op_RegF, 13, v13->as_VMReg()->next() );
- reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
- reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
-
- reg_def V14 ( SOC, SOE, Op_RegF, 14, v14->as_VMReg() );
- reg_def V14_H( SOC, SOE, Op_RegF, 14, v14->as_VMReg()->next() );
- reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
- reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
-
- reg_def V15 ( SOC, SOE, Op_RegF, 15, v15->as_VMReg() );
- reg_def V15_H( SOC, SOE, Op_RegF, 15, v15->as_VMReg()->next() );
- reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
- reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
-
- reg_def V16 ( SOC, SOC, Op_RegF, 16, v16->as_VMReg() );
- reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
- reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
- reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
-
- reg_def V17 ( SOC, SOC, Op_RegF, 17, v17->as_VMReg() );
- reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
- reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
- reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
-
- reg_def V18 ( SOC, SOC, Op_RegF, 18, v18->as_VMReg() );
- reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
- reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
- reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
-
- reg_def V19 ( SOC, SOC, Op_RegF, 19, v19->as_VMReg() );
- reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
- reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
- reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
-
- reg_def V20 ( SOC, SOC, Op_RegF, 20, v20->as_VMReg() );
- reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
- reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
- reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
-
- reg_def V21 ( SOC, SOC, Op_RegF, 21, v21->as_VMReg() );
- reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
- reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
- reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
-
- reg_def V22 ( SOC, SOC, Op_RegF, 22, v22->as_VMReg() );
- reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
- reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
- reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
-
- reg_def V23 ( SOC, SOC, Op_RegF, 23, v23->as_VMReg() );
- reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
- reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
- reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
-
- reg_def V24 ( SOC, SOC, Op_RegF, 24, v24->as_VMReg() );
- reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
- reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
- reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
-
- reg_def V25 ( SOC, SOC, Op_RegF, 25, v25->as_VMReg() );
- reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
- reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
- reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
-
- reg_def V26 ( SOC, SOC, Op_RegF, 26, v26->as_VMReg() );
- reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
- reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
- reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
-
- reg_def V27 ( SOC, SOC, Op_RegF, 27, v27->as_VMReg() );
- reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
- reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
- reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
-
- reg_def V28 ( SOC, SOC, Op_RegF, 28, v28->as_VMReg() );
- reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
- reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
- reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
-
- reg_def V29 ( SOC, SOC, Op_RegF, 29, v29->as_VMReg() );
- reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
- reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
- reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
-
- reg_def V30 ( SOC, SOC, Op_RegF, 30, v30->as_VMReg() );
- reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
- reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
- reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
-
- reg_def V31 ( SOC, SOC, Op_RegF, 31, v31->as_VMReg() );
- reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
- reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
- reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
+// For SVE vector registers, we simply extend vector register size to 8
+// 'logical' slots. This is nominally 256 bits but it actually covers
+// all possible 'physical' SVE vector register lengths from 128 ~ 2048
+// bits. The 'physical' SVE vector register length is detected during
+// startup, so the register allocator is able to identify the correct
+// number of bytes needed for an SVE spill/unspill.
+// Note that a vector register with 4 slots denotes a 128-bit NEON
+// register allowing it to be distinguished from the corresponding SVE
+// vector register when the SVE vector length is 128 bits.
+
+reg_def V0 ( SOC, SOC, Op_RegF, 0, v0->as_VMReg() );
+reg_def V0_H ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next() );
+reg_def V0_J ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(2) );
+reg_def V0_K ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(3) );
+reg_def V0_L ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(4) );
+reg_def V0_M ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(5) );
+reg_def V0_N ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(6) );
+reg_def V0_O ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(7) );
+
+reg_def V1 ( SOC, SOC, Op_RegF, 1, v1->as_VMReg() );
+reg_def V1_H ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next() );
+reg_def V1_J ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(2) );
+reg_def V1_K ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(3) );
+reg_def V1_L ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(4) );
+reg_def V1_M ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(5) );
+reg_def V1_N ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(6) );
+reg_def V1_O ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(7) );
+
+reg_def V2 ( SOC, SOC, Op_RegF, 2, v2->as_VMReg() );
+reg_def V2_H ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next() );
+reg_def V2_J ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(2) );
+reg_def V2_K ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(3) );
+reg_def V2_L ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(4) );
+reg_def V2_M ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(5) );
+reg_def V2_N ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(6) );
+reg_def V2_O ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(7) );
+
+reg_def V3 ( SOC, SOC, Op_RegF, 3, v3->as_VMReg() );
+reg_def V3_H ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next() );
+reg_def V3_J ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(2) );
+reg_def V3_K ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(3) );
+reg_def V3_L ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(4) );
+reg_def V3_M ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(5) );
+reg_def V3_N ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(6) );
+reg_def V3_O ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(7) );
+
+reg_def V4 ( SOC, SOC, Op_RegF, 4, v4->as_VMReg() );
+reg_def V4_H ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next() );
+reg_def V4_J ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(2) );
+reg_def V4_K ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(3) );
+reg_def V4_L ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(4) );
+reg_def V4_M ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(5) );
+reg_def V4_N ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(6) );
+reg_def V4_O ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(7) );
+
+reg_def V5 ( SOC, SOC, Op_RegF, 5, v5->as_VMReg() );
+reg_def V5_H ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next() );
+reg_def V5_J ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(2) );
+reg_def V5_K ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(3) );
+reg_def V5_L ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(4) );
+reg_def V5_M ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(5) );
+reg_def V5_N ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(6) );
+reg_def V5_O ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(7) );
+
+reg_def V6 ( SOC, SOC, Op_RegF, 6, v6->as_VMReg() );
+reg_def V6_H ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next() );
+reg_def V6_J ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(2) );
+reg_def V6_K ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(3) );
+reg_def V6_L ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(4) );
+reg_def V6_M ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(5) );
+reg_def V6_N ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(6) );
+reg_def V6_O ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(7) );
+
+reg_def V7 ( SOC, SOC, Op_RegF, 7, v7->as_VMReg() );
+reg_def V7_H ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next() );
+reg_def V7_J ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(2) );
+reg_def V7_K ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(3) );
+reg_def V7_L ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(4) );
+reg_def V7_M ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(5) );
+reg_def V7_N ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(6) );
+reg_def V7_O ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(7) );
+
+reg_def V8 ( SOC, SOE, Op_RegF, 8, v8->as_VMReg() );
+reg_def V8_H ( SOC, SOE, Op_RegF, 8, v8->as_VMReg()->next() );
+reg_def V8_J ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(2) );
+reg_def V8_K ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(3) );
+reg_def V8_L ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(4) );
+reg_def V8_M ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(5) );
+reg_def V8_N ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(6) );
+reg_def V8_O ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(7) );
+
+reg_def V9 ( SOC, SOE, Op_RegF, 9, v9->as_VMReg() );
+reg_def V9_H ( SOC, SOE, Op_RegF, 9, v9->as_VMReg()->next() );
+reg_def V9_J ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(2) );
+reg_def V9_K ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(3) );
+reg_def V9_L ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(4) );
+reg_def V9_M ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(5) );
+reg_def V9_N ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(6) );
+reg_def V9_O ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(7) );
+
+reg_def V10 ( SOC, SOE, Op_RegF, 10, v10->as_VMReg() );
+reg_def V10_H ( SOC, SOE, Op_RegF, 10, v10->as_VMReg()->next() );
+reg_def V10_J ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2) );
+reg_def V10_K ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3) );
+reg_def V10_L ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(4) );
+reg_def V10_M ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(5) );
+reg_def V10_N ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(6) );
+reg_def V10_O ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(7) );
+
+reg_def V11 ( SOC, SOE, Op_RegF, 11, v11->as_VMReg() );
+reg_def V11_H ( SOC, SOE, Op_RegF, 11, v11->as_VMReg()->next() );
+reg_def V11_J ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2) );
+reg_def V11_K ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3) );
+reg_def V11_L ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(4) );
+reg_def V11_M ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(5) );
+reg_def V11_N ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(6) );
+reg_def V11_O ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(7) );
+
+reg_def V12 ( SOC, SOE, Op_RegF, 12, v12->as_VMReg() );
+reg_def V12_H ( SOC, SOE, Op_RegF, 12, v12->as_VMReg()->next() );
+reg_def V12_J ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2) );
+reg_def V12_K ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3) );
+reg_def V12_L ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(4) );
+reg_def V12_M ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(5) );
+reg_def V12_N ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(6) );
+reg_def V12_O ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(7) );
+
+reg_def V13 ( SOC, SOE, Op_RegF, 13, v13->as_VMReg() );
+reg_def V13_H ( SOC, SOE, Op_RegF, 13, v13->as_VMReg()->next() );
+reg_def V13_J ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2) );
+reg_def V13_K ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3) );
+reg_def V13_L ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(4) );
+reg_def V13_M ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(5) );
+reg_def V13_N ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(6) );
+reg_def V13_O ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(7) );
+
+reg_def V14 ( SOC, SOE, Op_RegF, 14, v14->as_VMReg() );
+reg_def V14_H ( SOC, SOE, Op_RegF, 14, v14->as_VMReg()->next() );
+reg_def V14_J ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2) );
+reg_def V14_K ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3) );
+reg_def V14_L ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(4) );
+reg_def V14_M ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(5) );
+reg_def V14_N ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(6) );
+reg_def V14_O ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(7) );
+
+reg_def V15 ( SOC, SOE, Op_RegF, 15, v15->as_VMReg() );
+reg_def V15_H ( SOC, SOE, Op_RegF, 15, v15->as_VMReg()->next() );
+reg_def V15_J ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2) );
+reg_def V15_K ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3) );
+reg_def V15_L ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(4) );
+reg_def V15_M ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(5) );
+reg_def V15_N ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(6) );
+reg_def V15_O ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(7) );
+
+reg_def V16 ( SOC, SOC, Op_RegF, 16, v16->as_VMReg() );
+reg_def V16_H ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
+reg_def V16_J ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2) );
+reg_def V16_K ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3) );
+reg_def V16_L ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(4) );
+reg_def V16_M ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(5) );
+reg_def V16_N ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(6) );
+reg_def V16_O ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(7) );
+
+reg_def V17 ( SOC, SOC, Op_RegF, 17, v17->as_VMReg() );
+reg_def V17_H ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
+reg_def V17_J ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2) );
+reg_def V17_K ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3) );
+reg_def V17_L ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(4) );
+reg_def V17_M ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(5) );
+reg_def V17_N ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(6) );
+reg_def V17_O ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(7) );
+
+reg_def V18 ( SOC, SOC, Op_RegF, 18, v18->as_VMReg() );
+reg_def V18_H ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
+reg_def V18_J ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2) );
+reg_def V18_K ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3) );
+reg_def V18_L ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(4) );
+reg_def V18_M ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(5) );
+reg_def V18_N ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(6) );
+reg_def V18_O ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(7) );
+
+reg_def V19 ( SOC, SOC, Op_RegF, 19, v19->as_VMReg() );
+reg_def V19_H ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
+reg_def V19_J ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2) );
+reg_def V19_K ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3) );
+reg_def V19_L ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(4) );
+reg_def V19_M ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(5) );
+reg_def V19_N ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(6) );
+reg_def V19_O ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(7) );
+
+reg_def V20 ( SOC, SOC, Op_RegF, 20, v20->as_VMReg() );
+reg_def V20_H ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
+reg_def V20_J ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2) );
+reg_def V20_K ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3) );
+reg_def V20_L ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(4) );
+reg_def V20_M ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(5) );
+reg_def V20_N ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(6) );
+reg_def V20_O ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(7) );
+
+reg_def V21 ( SOC, SOC, Op_RegF, 21, v21->as_VMReg() );
+reg_def V21_H ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
+reg_def V21_J ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2) );
+reg_def V21_K ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3) );
+reg_def V21_L ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(4) );
+reg_def V21_M ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(5) );
+reg_def V21_N ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(6) );
+reg_def V21_O ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(7) );
+
+reg_def V22 ( SOC, SOC, Op_RegF, 22, v22->as_VMReg() );
+reg_def V22_H ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
+reg_def V22_J ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2) );
+reg_def V22_K ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3) );
+reg_def V22_L ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(4) );
+reg_def V22_M ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(5) );
+reg_def V22_N ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(6) );
+reg_def V22_O ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(7) );
+
+reg_def V23 ( SOC, SOC, Op_RegF, 23, v23->as_VMReg() );
+reg_def V23_H ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
+reg_def V23_J ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2) );
+reg_def V23_K ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3) );
+reg_def V23_L ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(4) );
+reg_def V23_M ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(5) );
+reg_def V23_N ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(6) );
+reg_def V23_O ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(7) );
+
+reg_def V24 ( SOC, SOC, Op_RegF, 24, v24->as_VMReg() );
+reg_def V24_H ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
+reg_def V24_J ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2) );
+reg_def V24_K ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3) );
+reg_def V24_L ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(4) );
+reg_def V24_M ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(5) );
+reg_def V24_N ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(6) );
+reg_def V24_O ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(7) );
+
+reg_def V25 ( SOC, SOC, Op_RegF, 25, v25->as_VMReg() );
+reg_def V25_H ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
+reg_def V25_J ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2) );
+reg_def V25_K ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3) );
+reg_def V25_L ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(4) );
+reg_def V25_M ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(5) );
+reg_def V25_N ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(6) );
+reg_def V25_O ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(7) );
+
+reg_def V26 ( SOC, SOC, Op_RegF, 26, v26->as_VMReg() );
+reg_def V26_H ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
+reg_def V26_J ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2) );
+reg_def V26_K ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3) );
+reg_def V26_L ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(4) );
+reg_def V26_M ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(5) );
+reg_def V26_N ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(6) );
+reg_def V26_O ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(7) );
+
+reg_def V27 ( SOC, SOC, Op_RegF, 27, v27->as_VMReg() );
+reg_def V27_H ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
+reg_def V27_J ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2) );
+reg_def V27_K ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3) );
+reg_def V27_L ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(4) );
+reg_def V27_M ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(5) );
+reg_def V27_N ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(6) );
+reg_def V27_O ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(7) );
+
+reg_def V28 ( SOC, SOC, Op_RegF, 28, v28->as_VMReg() );
+reg_def V28_H ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
+reg_def V28_J ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2) );
+reg_def V28_K ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3) );
+reg_def V28_L ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(4) );
+reg_def V28_M ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(5) );
+reg_def V28_N ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(6) );
+reg_def V28_O ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(7) );
+
+reg_def V29 ( SOC, SOC, Op_RegF, 29, v29->as_VMReg() );
+reg_def V29_H ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
+reg_def V29_J ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2) );
+reg_def V29_K ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3) );
+reg_def V29_L ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(4) );
+reg_def V29_M ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(5) );
+reg_def V29_N ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(6) );
+reg_def V29_O ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(7) );
+
+reg_def V30 ( SOC, SOC, Op_RegF, 30, v30->as_VMReg() );
+reg_def V30_H ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
+reg_def V30_J ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2) );
+reg_def V30_K ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3) );
+reg_def V30_L ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(4) );
+reg_def V30_M ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(5) );
+reg_def V30_N ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(6) );
+reg_def V30_O ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(7) );
+
+reg_def V31 ( SOC, SOC, Op_RegF, 31, v31->as_VMReg() );
+reg_def V31_H ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
+reg_def V31_J ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2) );
+reg_def V31_K ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3) );
+reg_def V31_L ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(4) );
+reg_def V31_M ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(5) );
+reg_def V31_N ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(6) );
+reg_def V31_O ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(7) );
+
+
+// ----------------------------
+// SVE Predicate Registers
+// ----------------------------
+reg_def P0 (SOC, SOC, Op_RegVMask, 0, p0->as_VMReg());
+reg_def P1 (SOC, SOC, Op_RegVMask, 1, p1->as_VMReg());
+reg_def P2 (SOC, SOC, Op_RegVMask, 2, p2->as_VMReg());
+reg_def P3 (SOC, SOC, Op_RegVMask, 3, p3->as_VMReg());
+reg_def P4 (SOC, SOC, Op_RegVMask, 4, p4->as_VMReg());
+reg_def P5 (SOC, SOC, Op_RegVMask, 5, p5->as_VMReg());
+reg_def P6 (SOC, SOC, Op_RegVMask, 6, p6->as_VMReg());
+reg_def P7 (SOC, SOC, Op_RegVMask, 7, p7->as_VMReg());
+
// ----------------------------
// Special Registers
@@ -381,50 +536,64 @@ alloc_class chunk0(
R29, R29_H, // fp
R30, R30_H, // lr
R31, R31_H, // sp
+ R8, R8_H, // rscratch1
+ R9, R9_H, // rscratch2
);
alloc_class chunk1(
// no save
- V16, V16_H, V16_J, V16_K,
- V17, V17_H, V17_J, V17_K,
- V18, V18_H, V18_J, V18_K,
- V19, V19_H, V19_J, V19_K,
- V20, V20_H, V20_J, V20_K,
- V21, V21_H, V21_J, V21_K,
- V22, V22_H, V22_J, V22_K,
- V23, V23_H, V23_J, V23_K,
- V24, V24_H, V24_J, V24_K,
- V25, V25_H, V25_J, V25_K,
- V26, V26_H, V26_J, V26_K,
- V27, V27_H, V27_J, V27_K,
- V28, V28_H, V28_J, V28_K,
- V29, V29_H, V29_J, V29_K,
- V30, V30_H, V30_J, V30_K,
- V31, V31_H, V31_J, V31_K,
+ V16, V16_H, V16_J, V16_K, V16_L, V16_M, V16_N, V16_O,
+ V17, V17_H, V17_J, V17_K, V17_L, V17_M, V17_N, V17_O,
+ V18, V18_H, V18_J, V18_K, V18_L, V18_M, V18_N, V18_O,
+ V19, V19_H, V19_J, V19_K, V19_L, V19_M, V19_N, V19_O,
+ V20, V20_H, V20_J, V20_K, V20_L, V20_M, V20_N, V20_O,
+ V21, V21_H, V21_J, V21_K, V21_L, V21_M, V21_N, V21_O,
+ V22, V22_H, V22_J, V22_K, V22_L, V22_M, V22_N, V22_O,
+ V23, V23_H, V23_J, V23_K, V23_L, V23_M, V23_N, V23_O,
+ V24, V24_H, V24_J, V24_K, V24_L, V24_M, V24_N, V24_O,
+ V25, V25_H, V25_J, V25_K, V25_L, V25_M, V25_N, V25_O,
+ V26, V26_H, V26_J, V26_K, V26_L, V26_M, V26_N, V26_O,
+ V27, V27_H, V27_J, V27_K, V27_L, V27_M, V27_N, V27_O,
+ V28, V28_H, V28_J, V28_K, V28_L, V28_M, V28_N, V28_O,
+ V29, V29_H, V29_J, V29_K, V29_L, V29_M, V29_N, V29_O,
+ V30, V30_H, V30_J, V30_K, V30_L, V30_M, V30_N, V30_O,
+ V31, V31_H, V31_J, V31_K, V31_L, V31_M, V31_N, V31_O,
// arg registers
- V0, V0_H, V0_J, V0_K,
- V1, V1_H, V1_J, V1_K,
- V2, V2_H, V2_J, V2_K,
- V3, V3_H, V3_J, V3_K,
- V4, V4_H, V4_J, V4_K,
- V5, V5_H, V5_J, V5_K,
- V6, V6_H, V6_J, V6_K,
- V7, V7_H, V7_J, V7_K,
+ V0, V0_H, V0_J, V0_K, V0_L, V0_M, V0_N, V0_O,
+ V1, V1_H, V1_J, V1_K, V1_L, V1_M, V1_N, V1_O,
+ V2, V2_H, V2_J, V2_K, V2_L, V2_M, V2_N, V2_O,
+ V3, V3_H, V3_J, V3_K, V3_L, V3_M, V3_N, V3_O,
+ V4, V4_H, V4_J, V4_K, V4_L, V4_M, V4_N, V4_O,
+ V5, V5_H, V5_J, V5_K, V5_L, V5_M, V5_N, V5_O,
+ V6, V6_H, V6_J, V6_K, V6_L, V6_M, V6_N, V6_O,
+ V7, V7_H, V7_J, V7_K, V7_L, V7_M, V7_N, V7_O,
// non-volatiles
- V8, V8_H, V8_J, V8_K,
- V9, V9_H, V9_J, V9_K,
- V10, V10_H, V10_J, V10_K,
- V11, V11_H, V11_J, V11_K,
- V12, V12_H, V12_J, V12_K,
- V13, V13_H, V13_J, V13_K,
- V14, V14_H, V14_J, V14_K,
- V15, V15_H, V15_J, V15_K,
+ V8, V8_H, V8_J, V8_K, V8_L, V8_M, V8_N, V8_O,
+ V9, V9_H, V9_J, V9_K, V9_L, V9_M, V9_N, V9_O,
+ V10, V10_H, V10_J, V10_K, V10_L, V10_M, V10_N, V10_O,
+ V11, V11_H, V11_J, V11_K, V11_L, V11_M, V11_N, V11_O,
+ V12, V12_H, V12_J, V12_K, V12_L, V12_M, V12_N, V12_O,
+ V13, V13_H, V13_J, V13_K, V13_L, V13_M, V13_N, V13_O,
+ V14, V14_H, V14_J, V14_K, V14_L, V14_M, V14_N, V14_O,
+ V15, V15_H, V15_J, V15_K, V15_L, V15_M, V15_N, V15_O,
+);
+
+alloc_class chunk2 (
+ P0,
+ P1,
+ P2,
+ P3,
+ P4,
+ P5,
+ P6,
+ P7,
+ // Only use P0~P7 here for performance
);
-alloc_class chunk2(RFLAGS);
+alloc_class chunk3(RFLAGS);
//----------Architecture Description Register Classes--------------------------
// Several register classes are automatically defined based upon information in
@@ -865,6 +1034,42 @@ reg_class double_reg(
V31, V31_H
);
+// Class for all SVE vector registers.
+reg_class vectora_reg (
+ V0, V0_H, V0_J, V0_K, V0_L, V0_M, V0_N, V0_O,
+ V1, V1_H, V1_J, V1_K, V1_L, V1_M, V1_N, V1_O,
+ V2, V2_H, V2_J, V2_K, V2_L, V2_M, V2_N, V2_O,
+ V3, V3_H, V3_J, V3_K, V3_L, V3_M, V3_N, V3_O,
+ V4, V4_H, V4_J, V4_K, V4_L, V4_M, V4_N, V4_O,
+ V5, V5_H, V5_J, V5_K, V5_L, V5_M, V5_N, V5_O,
+ V6, V6_H, V6_J, V6_K, V6_L, V6_M, V6_N, V6_O,
+ V7, V7_H, V7_J, V7_K, V7_L, V7_M, V7_N, V7_O,
+ V8, V8_H, V8_J, V8_K, V8_L, V8_M, V8_N, V8_O,
+ V9, V9_H, V9_J, V9_K, V9_L, V9_M, V9_N, V9_O,
+ V10, V10_H, V10_J, V10_K, V10_L, V10_M, V10_N, V10_O,
+ V11, V11_H, V11_J, V11_K, V11_L, V11_M, V11_N, V11_O,
+ V12, V12_H, V12_J, V12_K, V12_L, V12_M, V12_N, V12_O,
+ V13, V13_H, V13_J, V13_K, V13_L, V13_M, V13_N, V13_O,
+ V14, V14_H, V14_J, V14_K, V14_L, V14_M, V14_N, V14_O,
+ V15, V15_H, V15_J, V15_K, V15_L, V15_M, V15_N, V15_O,
+ V16, V16_H, V16_J, V16_K, V16_L, V16_M, V16_N, V16_O,
+ V17, V17_H, V17_J, V17_K, V17_L, V17_M, V17_N, V17_O,
+ V18, V18_H, V18_J, V18_K, V18_L, V18_M, V18_N, V18_O,
+ V19, V19_H, V19_J, V19_K, V19_L, V19_M, V19_N, V19_O,
+ V20, V20_H, V20_J, V20_K, V20_L, V20_M, V20_N, V20_O,
+ V21, V21_H, V21_J, V21_K, V21_L, V21_M, V21_N, V21_O,
+ V22, V22_H, V22_J, V22_K, V22_L, V22_M, V22_N, V22_O,
+ V23, V23_H, V23_J, V23_K, V23_L, V23_M, V23_N, V23_O,
+ V24, V24_H, V24_J, V24_K, V24_L, V24_M, V24_N, V24_O,
+ V25, V25_H, V25_J, V25_K, V25_L, V25_M, V25_N, V25_O,
+ V26, V26_H, V26_J, V26_K, V26_L, V26_M, V26_N, V26_O,
+ V27, V27_H, V27_J, V27_K, V27_L, V27_M, V27_N, V27_O,
+ V28, V28_H, V28_J, V28_K, V28_L, V28_M, V28_N, V28_O,
+ V29, V29_H, V29_J, V29_K, V29_L, V29_M, V29_N, V29_O,
+ V30, V30_H, V30_J, V30_K, V30_L, V30_M, V30_N, V30_O,
+ V31, V31_H, V31_J, V31_K, V31_L, V31_M, V31_N, V31_O,
+);
+
// Class for all 64bit vector registers
reg_class vectord_reg(
V0, V0_H,
@@ -1097,6 +1302,31 @@ reg_class v31_reg(
V31, V31_H
);
+// Class for all SVE predicate registers.
+reg_class pr_reg (
+ P0,
+ P1,
+ P2,
+ P3,
+ P4,
+ P5,
+ P6,
+ // P7, non-allocatable, preserved with all elements preset to TRUE.
+);
+
+// Class for SVE governing predicate registers, which are used
+// to determine the active elements of a predicated instruction.
+reg_class gov_pr (
+ P0,
+ P1,
+ P2,
+ P3,
+ P4,
+ P5,
+ P6,
+ // P7, non-allocatable, preserved with all elements preset to TRUE.
+);
+
// Singleton class for condition codes
reg_class int_flags(RFLAGS);
@@ -1862,7 +2092,7 @@ int MachEpilogNode::safepoint_offset() const {
// Figure out which register class each belongs in: rc_int, rc_float or
// rc_stack.
-enum RC { rc_bad, rc_int, rc_float, rc_stack };
+enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
static enum RC rc_class(OptoReg::Name reg) {
@@ -1870,20 +2100,25 @@ static enum RC rc_class(OptoReg::Name reg) {
return rc_bad;
}
- // we have 30 int registers * 2 halves
- // (rscratch1 and rscratch2 are omitted)
- int slots_of_int_registers = RegisterImpl::max_slots_per_register * (RegisterImpl::number_of_registers - 2);
+ // we have 32 int registers * 2 halves
+ int slots_of_int_registers = RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers;
if (reg < slots_of_int_registers) {
return rc_int;
}
- // we have 32 float register * 4 halves
- if (reg < slots_of_int_registers + FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers) {
+ // we have 32 float register * 8 halves
+ int slots_of_float_registers = FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers;
+ if (reg < slots_of_int_registers + slots_of_float_registers) {
return rc_float;
}
- // Between float regs & stack is the flags regs.
+ int slots_of_predicate_registers = PRegisterImpl::max_slots_per_register * PRegisterImpl::number_of_registers;
+ if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_predicate_registers) {
+ return rc_predicate;
+ }
+
+ // Between predicate regs & stack is the flags.
assert(OptoReg::is_stack(reg), "blow up if spilling flags");
return rc_stack;
@@ -1941,12 +2176,12 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
as_FloatRegister(Matcher::_regEncode[src_lo]));
} else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
__ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
- ireg == Op_VecD ? __ D : __ Q,
- ra_->reg2offset(dst_lo));
+ ireg == Op_VecD ? __ D : __ Q,
+ ra_->reg2offset(dst_lo));
} else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
__ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
- ireg == Op_VecD ? __ D : __ Q,
- ra_->reg2offset(src_lo));
+ ireg == Op_VecD ? __ D : __ Q,
+ ra_->reg2offset(src_lo));
} else {
ShouldNotReachHere();
}
@@ -2031,9 +2266,24 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
st->print("%s", Matcher::regName[dst_lo]);
}
if (bottom_type()->isa_vect() != NULL) {
- st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
+ int vsize = 0;
+ switch (ideal_reg()) {
+ case Op_VecD:
+ vsize = 64;
+ break;
+ case Op_VecX:
+ vsize = 128;
+ break;
+ case Op_VecA:
+ vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8;
+ break;
+ default:
+ assert(false, "bad register type for spill");
+ ShouldNotReachHere();
+ }
+ st->print("\t# vector spill size = %d", vsize);
} else {
- st->print("\t# spill size = %d", is64 ? 64:32);
+ st->print("\t# spill size = %d", is64 ? 64 : 32);
}
}
@@ -2204,7 +2454,7 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
}
const bool Matcher::has_predicated_vectors(void) {
- return false;
+ return UseSVE > 0;
}
const int Matcher::float_pressure(int default_pressure_threshold) {
@@ -2240,7 +2490,8 @@ const bool Matcher::convL2FSupported(void) {
// Vector width in bytes.
const int Matcher::vector_width_in_bytes(BasicType bt) {
- int size = MIN2(16,(int)MaxVectorSize);
+ // The MaxVectorSize should have been set by detecting SVE max vector register size.
+ int size = MIN2((UseSVE > 0) ? 256 : 16, (int)MaxVectorSize);
// Minimum 2 values in vector
if (size < 2*type2aelembytes(bt)) size = 0;
// But never < 4
@@ -2253,14 +2504,32 @@ const int Matcher::max_vector_size(const BasicType bt) {
return vector_width_in_bytes(bt)/type2aelembytes(bt);
}
const int Matcher::min_vector_size(const BasicType bt) {
-// For the moment limit the vector size to 8 bytes
+ int max_size = max_vector_size(bt);
+ if ((UseSVE > 0) && (MaxVectorSize >= 16)) {
+ // Currently vector length less than SVE vector register size is not supported.
+ return max_size;
+ } else {
+ // For the moment limit the vector size to 8 bytes with NEON.
int size = 8 / type2aelembytes(bt);
if (size < 2) size = 2;
return size;
+ }
+}
+
+const bool Matcher::supports_scalable_vector() {
+ return UseSVE > 0;
+}
+
+// Actual max scalable vector register length.
+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
+ return Matcher::max_vector_size(bt);
}
// Vector ideal reg.
const uint Matcher::vector_ideal_reg(int len) {
+ if (UseSVE > 0 && 16 <= len && len <= 256) {
+ return Op_VecA;
+ }
switch(len) {
case 8: return Op_VecD;
case 16: return Op_VecX;
@@ -2270,6 +2539,9 @@ const uint Matcher::vector_ideal_reg(int len) {
}
const uint Matcher::vector_shift_count_ideal_reg(int size) {
+ if (UseSVE > 0 && 16 <= size && size <= 256) {
+ return Op_VecA;
+ }
switch(size) {
case 8: return Op_VecD;
case 16: return Op_VecX;
@@ -4851,6 +5123,18 @@ operand vRegD()
interface(REG_INTER);
%}
+// Generic vector class. This will be used for
+// all vector operands, including NEON and SVE,
+// but currently only used for SVE VecA.
+operand vReg()
+%{
+ constraint(ALLOC_IN_RC(vectora_reg));
+ match(VecA);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
operand vecD()
%{
constraint(ALLOC_IN_RC(vectord_reg));
@@ -5159,6 +5443,15 @@ operand vRegD_V31()
interface(REG_INTER);
%}
+operand pRegGov()
+%{
+ constraint(ALLOC_IN_RC(gov_pr));
+ match(RegVMask);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
// Flags register, used as output of signed compare instructions
// note that on AArch64 we also use this register as the output for
diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
index 228b82660..6f4e75ff4 100644
--- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
@@ -273,6 +273,14 @@ public:
f(r->encoding_nocheck(), lsb + 4, lsb);
}
+ void prf(PRegister r, int lsb) {
+ f(r->encoding_nocheck(), lsb + 3, lsb);
+ }
+
+ void pgrf(PRegister r, int lsb) {
+ f(r->encoding_nocheck(), lsb + 2, lsb);
+ }
+
unsigned get(int msb = 31, int lsb = 0) {
int nbits = msb - lsb + 1;
unsigned mask = checked_cast<unsigned>(right_n_bits(nbits)) << lsb;
@@ -669,6 +677,12 @@ public:
void rf(FloatRegister reg, int lsb) {
current->rf(reg, lsb);
}
+ void prf(PRegister reg, int lsb) {
+ current->prf(reg, lsb);
+ }
+ void pgrf(PRegister reg, int lsb) {
+ current->pgrf(reg, lsb);
+ }
void fixed(unsigned value, unsigned mask) {
current->fixed(value, mask);
}
diff --git a/src/hotspot/cpu/aarch64/register_aarch64.cpp b/src/hotspot/cpu/aarch64/register_aarch64.cpp
index 36cbe3fee..3db8e8337 100644
--- a/src/hotspot/cpu/aarch64/register_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/register_aarch64.cpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -33,6 +33,9 @@ const int ConcreteRegisterImpl::max_fpr
= ConcreteRegisterImpl::max_gpr +
FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register;
+const int ConcreteRegisterImpl::max_pr
+ = ConcreteRegisterImpl::max_fpr + PRegisterImpl::number_of_registers;
+
const char* RegisterImpl::name() const {
const char* names[number_of_registers] = {
"c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7",
@@ -54,3 +57,10 @@ const char* FloatRegisterImpl::name() const {
};
return is_valid() ? names[encoding()] : "noreg";
}
+
+const char* PRegisterImpl::name() const {
+ const char* names[number_of_registers] = {
+ "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7"
+ };
+ return is_valid() ? names[encoding()] : "noreg";
+}
diff --git a/src/hotspot/cpu/aarch64/register_aarch64.hpp b/src/hotspot/cpu/aarch64/register_aarch64.hpp
index 20f549188..e7a9cee92 100644
--- a/src/hotspot/cpu/aarch64/register_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/register_aarch64.hpp
@@ -140,9 +140,10 @@ class FloatRegisterImpl: public AbstractRegisterImpl {
public:
enum {
number_of_registers = 32,
- max_slots_per_register = 4,
+ max_slots_per_register = 8,
save_slots_per_register = 2,
- extra_save_slots_per_register = max_slots_per_register - save_slots_per_register
+ slots_per_neon_register = 4,
+ extra_save_slots_per_neon_register = slots_per_neon_register - save_slots_per_register
};
// construction
@@ -198,6 +199,79 @@ CONSTANT_REGISTER_DECLARATION(FloatRegister, v29 , (29));
CONSTANT_REGISTER_DECLARATION(FloatRegister, v30 , (30));
CONSTANT_REGISTER_DECLARATION(FloatRegister, v31 , (31));
+// SVE vector registers, shared with the SIMD&FP v0-v31. Vn maps to Zn[127:0].
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z0 , ( 0));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z1 , ( 1));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z2 , ( 2));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z3 , ( 3));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z4 , ( 4));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z5 , ( 5));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z6 , ( 6));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z7 , ( 7));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z8 , ( 8));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z9 , ( 9));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z10 , (10));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z11 , (11));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z12 , (12));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z13 , (13));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z14 , (14));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z15 , (15));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z16 , (16));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z17 , (17));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z18 , (18));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z19 , (19));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z20 , (20));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z21 , (21));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z22 , (22));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z23 , (23));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z24 , (24));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z25 , (25));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z26 , (26));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z27 , (27));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z28 , (28));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z29 , (29));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z30 , (30));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z31 , (31));
+
+class PRegisterImpl;
+typedef PRegisterImpl* PRegister;
+inline PRegister as_PRegister(int encoding) {
+ return (PRegister)(intptr_t)encoding;
+}
+
+// The implementation of predicate registers for the architecture
+class PRegisterImpl: public AbstractRegisterImpl {
+ public:
+ enum {
+ number_of_registers = 8,
+ max_slots_per_register = 1
+ };
+
+ // construction
+ inline friend PRegister as_PRegister(int encoding);
+
+ VMReg as_VMReg();
+
+ // derived registers, offsets, and addresses
+ PRegister successor() const { return as_PRegister(encoding() + 1); }
+
+ // accessors
+ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; }
+ int encoding_nocheck() const { return (intptr_t)this; }
+ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
+ const char* name() const;
+};
+
+// The predicate registers of SVE.
+CONSTANT_REGISTER_DECLARATION(PRegister, p0, ( 0));
+CONSTANT_REGISTER_DECLARATION(PRegister, p1, ( 1));
+CONSTANT_REGISTER_DECLARATION(PRegister, p2, ( 2));
+CONSTANT_REGISTER_DECLARATION(PRegister, p3, ( 3));
+CONSTANT_REGISTER_DECLARATION(PRegister, p4, ( 4));
+CONSTANT_REGISTER_DECLARATION(PRegister, p5, ( 5));
+CONSTANT_REGISTER_DECLARATION(PRegister, p6, ( 6));
+CONSTANT_REGISTER_DECLARATION(PRegister, p7, ( 7));
+
// Need to know the total number of registers of all sorts for SharedInfo.
// Define a class that exports it.
class ConcreteRegisterImpl : public AbstractRegisterImpl {
@@ -210,12 +284,14 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl {
number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers +
FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers +
+ PRegisterImpl::max_slots_per_register * PRegisterImpl::number_of_registers +
1) // flags
};
// added to make it compile
static const int max_gpr;
static const int max_fpr;
+ static const int max_pr;
};
class RegSetIterator;
diff --git a/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp b/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp
index c18109087..15131ed32 100644
--- a/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -154,3 +154,45 @@ REGISTER_DEFINITION(Register, rthread);
REGISTER_DEFINITION(Register, rheapbase);
REGISTER_DEFINITION(Register, r31_sp);
+
+REGISTER_DEFINITION(FloatRegister, z0);
+REGISTER_DEFINITION(FloatRegister, z1);
+REGISTER_DEFINITION(FloatRegister, z2);
+REGISTER_DEFINITION(FloatRegister, z3);
+REGISTER_DEFINITION(FloatRegister, z4);
+REGISTER_DEFINITION(FloatRegister, z5);
+REGISTER_DEFINITION(FloatRegister, z6);
+REGISTER_DEFINITION(FloatRegister, z7);
+REGISTER_DEFINITION(FloatRegister, z8);
+REGISTER_DEFINITION(FloatRegister, z9);
+REGISTER_DEFINITION(FloatRegister, z10);
+REGISTER_DEFINITION(FloatRegister, z11);
+REGISTER_DEFINITION(FloatRegister, z12);
+REGISTER_DEFINITION(FloatRegister, z13);
+REGISTER_DEFINITION(FloatRegister, z14);
+REGISTER_DEFINITION(FloatRegister, z15);
+REGISTER_DEFINITION(FloatRegister, z16);
+REGISTER_DEFINITION(FloatRegister, z17);
+REGISTER_DEFINITION(FloatRegister, z18);
+REGISTER_DEFINITION(FloatRegister, z19);
+REGISTER_DEFINITION(FloatRegister, z20);
+REGISTER_DEFINITION(FloatRegister, z21);
+REGISTER_DEFINITION(FloatRegister, z22);
+REGISTER_DEFINITION(FloatRegister, z23);
+REGISTER_DEFINITION(FloatRegister, z24);
+REGISTER_DEFINITION(FloatRegister, z25);
+REGISTER_DEFINITION(FloatRegister, z26);
+REGISTER_DEFINITION(FloatRegister, z27);
+REGISTER_DEFINITION(FloatRegister, z28);
+REGISTER_DEFINITION(FloatRegister, z29);
+REGISTER_DEFINITION(FloatRegister, z30);
+REGISTER_DEFINITION(FloatRegister, z31);
+
+REGISTER_DEFINITION(PRegister, p0);
+REGISTER_DEFINITION(PRegister, p1);
+REGISTER_DEFINITION(PRegister, p2);
+REGISTER_DEFINITION(PRegister, p3);
+REGISTER_DEFINITION(PRegister, p4);
+REGISTER_DEFINITION(PRegister, p5);
+REGISTER_DEFINITION(PRegister, p6);
+REGISTER_DEFINITION(PRegister, p7);
diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
index dbad48582..3bf7284a7 100644
--- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
@@ -111,11 +111,28 @@ class RegisterSaver {
};
OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
+ bool use_sve = false;
+ int sve_vector_size_in_bytes = 0;
+ int sve_vector_size_in_slots = 0;
+
+#ifdef COMPILER2
+ use_sve = Matcher::supports_scalable_vector();
+ sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
+ sve_vector_size_in_slots = Matcher::scalable_vector_reg_size(T_FLOAT);
+#endif
+
#if COMPILER2_OR_JVMCI
if (save_vectors) {
+ int vect_words = 0;
+ int extra_save_slots_per_register = 0;
// Save upper half of vector registers
- int vect_words = FloatRegisterImpl::number_of_registers * FloatRegisterImpl::extra_save_slots_per_register /
- VMRegImpl::slots_per_word;
+ if (use_sve) {
+ extra_save_slots_per_register = sve_vector_size_in_slots - FloatRegisterImpl::save_slots_per_register;
+ } else {
+ extra_save_slots_per_register = FloatRegisterImpl::extra_save_slots_per_neon_register;
+ }
+ vect_words = FloatRegisterImpl::number_of_registers * extra_save_slots_per_register /
+ VMRegImpl::slots_per_word;
additional_frame_words += vect_words;
}
#else
@@ -158,8 +175,13 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
FloatRegister r = as_FloatRegister(i);
- int sp_offset = save_vectors ? (FloatRegisterImpl::max_slots_per_register * i) :
- (FloatRegisterImpl::save_slots_per_register * i);
+ int sp_offset = 0;
+ if (save_vectors) {
+ sp_offset = use_sve ? (sve_vector_size_in_slots * i) :
+ (FloatRegisterImpl::slots_per_neon_register * i);
+ } else {
+ sp_offset = FloatRegisterImpl::save_slots_per_register * i;
+ }
oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
r->as_VMReg());
}
diff --git a/src/hotspot/cpu/aarch64/vmreg_aarch64.inline.hpp b/src/hotspot/cpu/aarch64/vmreg_aarch64.inline.hpp
index c5d4383b4..dde7a7a91 100644
--- a/src/hotspot/cpu/aarch64/vmreg_aarch64.inline.hpp
+++ b/src/hotspot/cpu/aarch64/vmreg_aarch64.inline.hpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -36,4 +36,8 @@ inline VMReg FloatRegisterImpl::as_VMReg() {
ConcreteRegisterImpl::max_gpr);
}
+inline VMReg PRegisterImpl::as_VMReg() {
+ return VMRegImpl::as_VMReg(encoding() + ConcreteRegisterImpl::max_fpr);
+}
+
#endif // CPU_AARCH64_VM_VMREG_AARCH64_INLINE_HPP
diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad
index 4a32af54e..0ef4d7f3e 100644
--- a/src/hotspot/cpu/arm/arm.ad
+++ b/src/hotspot/cpu/arm/arm.ad
@@ -1121,6 +1121,14 @@ const int Matcher::vector_width_in_bytes(BasicType bt) {
return MaxVectorSize;
}
+const bool Matcher::supports_scalable_vector() {
+ return false;
+}
+
+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
+ return -1;
+}
+
// Vector ideal reg corresponding to specified size in bytes
const uint Matcher::vector_ideal_reg(int size) {
assert(MaxVectorSize >= size, "");
diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad
index 36cbada53..7ee16a975 100644
--- a/src/hotspot/cpu/ppc/ppc.ad
+++ b/src/hotspot/cpu/ppc/ppc.ad
@@ -2310,6 +2310,14 @@ const int Matcher::min_vector_size(const BasicType bt) {
return max_vector_size(bt); // Same as max.
}
+const bool Matcher::supports_scalable_vector() {
+ return false;
+}
+
+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
+ return -1;
+}
+
// PPC implementation uses VSX load/store instructions (if
// SuperwordUseVSX) which support 4 byte but not arbitrary alignment
const bool Matcher::misaligned_vectors_ok() {
diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad
index 96c231b0a..ea09aaafc 100644
--- a/src/hotspot/cpu/s390/s390.ad
+++ b/src/hotspot/cpu/s390/s390.ad
@@ -1573,6 +1573,14 @@ const int Matcher::min_vector_size(const BasicType bt) {
return max_vector_size(bt); // Same as max.
}
+const bool Matcher::supports_scalable_vector() {
+ return false;
+}
+
+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
+ return -1;
+}
+
const uint Matcher::vector_shift_count_ideal_reg(int size) {
fatal("vector shift is not supported");
return Node::NotAMachineReg;
diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad
index abdd7483d..76dd6addd 100644
--- a/src/hotspot/cpu/x86/x86.ad
+++ b/src/hotspot/cpu/x86/x86.ad
@@ -1,5 +1,5 @@
//
-// Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@@ -1485,6 +1485,14 @@ const int Matcher::min_vector_size(const BasicType bt) {
return MIN2(size,max_size);
}
+const bool Matcher::supports_scalable_vector() {
+ return false;
+}
+
+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
+ return -1;
+}
+
// Vector ideal reg corresponding to specified size in bytes
const uint Matcher::vector_ideal_reg(int size) {
assert(MaxVectorSize >= size, "");
diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad
index 8904bba97..e09cdd061 100644
--- a/src/hotspot/cpu/x86/x86_64.ad
+++ b/src/hotspot/cpu/x86/x86_64.ad
@@ -2968,7 +2968,7 @@ frame
RAX_H_num // Op_RegL
};
// Excluded flags and vector registers.
- assert(ARRAY_SIZE(hi) == _last_machine_leaf - 6, "missing type");
+ assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
%}
%}
diff --git a/src/hotspot/share/adlc/archDesc.cpp b/src/hotspot/share/adlc/archDesc.cpp
index ba61aa4c0..9e41b2dc6 100644
--- a/src/hotspot/share/adlc/archDesc.cpp
+++ b/src/hotspot/share/adlc/archDesc.cpp
@@ -1,5 +1,5 @@
//
-// Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@@ -929,6 +929,7 @@ const char *ArchDesc::getIdealType(const char *idealOp) {
// Match Vector types.
if (strncmp(idealOp, "Vec",3)==0) {
switch(last_char) {
+ case 'A': return "TypeVect::VECTA";
case 'S': return "TypeVect::VECTS";
case 'D': return "TypeVect::VECTD";
case 'X': return "TypeVect::VECTX";
@@ -939,6 +940,10 @@ const char *ArchDesc::getIdealType(const char *idealOp) {
}
}
+ if (strncmp(idealOp, "RegVMask", 8) == 0) {
+ return "Type::BOTTOM";
+ }
+
// !!!!!
switch(last_char) {
case 'I': return "TypeInt::INT";
diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp
index c7b855a7e..a37866824 100644
--- a/src/hotspot/share/adlc/formssel.cpp
+++ b/src/hotspot/share/adlc/formssel.cpp
@@ -3963,6 +3963,8 @@ bool MatchRule::is_base_register(FormDict &globals) const {
strcmp(opType,"RegL")==0 ||
strcmp(opType,"RegF")==0 ||
strcmp(opType,"RegD")==0 ||
+ strcmp(opType,"RegVMask")==0 ||
+ strcmp(opType,"VecA")==0 ||
strcmp(opType,"VecS")==0 ||
strcmp(opType,"VecD")==0 ||
strcmp(opType,"VecX")==0 ||
diff --git a/src/hotspot/share/opto/chaitin.cpp b/src/hotspot/share/opto/chaitin.cpp
index 914dc43f6..710af9de8 100644
--- a/src/hotspot/share/opto/chaitin.cpp
+++ b/src/hotspot/share/opto/chaitin.cpp
@@ -77,6 +77,7 @@ void LRG::dump() const {
if( _is_oop ) tty->print("Oop ");
if( _is_float ) tty->print("Float ");
if( _is_vector ) tty->print("Vector ");
+ if( _is_scalable ) tty->print("Scalable ");
if( _was_spilled1 ) tty->print("Spilled ");
if( _was_spilled2 ) tty->print("Spilled2 ");
if( _direct_conflict ) tty->print("Direct_conflict ");
@@ -646,7 +647,15 @@ void PhaseChaitin::Register_Allocate() {
// Live ranges record the highest register in their mask.
// We want the low register for the AD file writer's convenience.
OptoReg::Name hi = lrg.reg(); // Get hi register
- OptoReg::Name lo = OptoReg::add(hi, (1-lrg.num_regs())); // Find lo
+ int num_regs = lrg.num_regs();
+ if (lrg.is_scalable() && OptoReg::is_stack(hi)) {
+ // For scalable vector registers, when they are allocated in physical
+ // registers, num_regs is RegMask::SlotsPerVecA for reg mask of scalable
+ // vector. If they are allocated on stack, we need to get the actual
+ // num_regs, which reflects the physical length of scalable registers.
+ num_regs = lrg.scalable_reg_slots();
+ }
+ OptoReg::Name lo = OptoReg::add(hi, (1-num_regs)); // Find lo
// We have to use pair [lo,lo+1] even for wide vectors because
// the rest of code generation works only with pairs. It is safe
// since for registers encoding only 'lo' is used.
@@ -801,8 +810,19 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
// Check for vector live range (only if vector register is used).
// On SPARC vector uses RegD which could be misaligned so it is not
// processes as vector in RA.
- if (RegMask::is_vector(ireg))
+ if (RegMask::is_vector(ireg)) {
lrg._is_vector = 1;
+ if (ireg == Op_VecA) {
+ assert(Matcher::supports_scalable_vector(), "scalable vector should be supported");
+ lrg._is_scalable = 1;
+ // For scalable vector, when it is allocated in physical register,
+ // num_regs is RegMask::SlotsPerVecA for reg mask,
+ // which may not be the actual physical register size.
+ // If it is allocated in stack, we need to get the actual
+ // physical length of scalable vector register.
+ lrg.set_scalable_reg_slots(Matcher::scalable_vector_reg_size(T_FLOAT));
+ }
+ }
assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD || ireg == Op_RegL,
"vector must be in vector registers");
@@ -912,6 +932,13 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
lrg.set_reg_pressure(1);
#endif
break;
+ case Op_VecA:
+ assert(Matcher::supports_scalable_vector(), "does not support scalable vector");
+ assert(RegMask::num_registers(Op_VecA) == RegMask::SlotsPerVecA, "sanity");
+ assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecA), "vector should be aligned");
+ lrg.set_num_regs(RegMask::SlotsPerVecA);
+ lrg.set_reg_pressure(1);
+ break;
case Op_VecS:
assert(Matcher::vector_size_supported(T_BYTE,4), "sanity");
assert(RegMask::num_registers(Op_VecS) == RegMask::SlotsPerVecS, "sanity");
@@ -1358,6 +1385,46 @@ static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) {
return false;
}
+static OptoReg::Name find_first_set(LRG &lrg, RegMask mask, int chunk) {
+ int num_regs = lrg.num_regs();
+ OptoReg::Name assigned = mask.find_first_set(lrg, num_regs);
+
+ if (lrg.is_scalable()) {
+ // a physical register is found
+ if (chunk == 0 && OptoReg::is_reg(assigned)) {
+ return assigned;
+ }
+
+ // find available stack slots for scalable register
+ if (lrg._is_vector) {
+ num_regs = lrg.scalable_reg_slots();
+ // if actual scalable vector register is exactly SlotsPerVecA * 32 bits
+ if (num_regs == RegMask::SlotsPerVecA) {
+ return assigned;
+ }
+
+ // mask has been cleared out by clear_to_sets(SlotsPerVecA) before choose_color, but it
+ // does not work for scalable size. We have to find adjacent scalable_reg_slots() bits
+ // instead of SlotsPerVecA bits.
+ assigned = mask.find_first_set(lrg, num_regs); // find highest valid reg
+ while (OptoReg::is_valid(assigned) && RegMask::can_represent(assigned)) {
+ // Verify the found reg has scalable_reg_slots() bits set.
+ if (mask.is_valid_reg(assigned, num_regs)) {
+ return assigned;
+ } else {
+ // Remove more for each iteration
+ mask.Remove(assigned - num_regs + 1); // Unmask the lowest reg
+ mask.clear_to_sets(RegMask::SlotsPerVecA); // Align by SlotsPerVecA bits
+ assigned = mask.find_first_set(lrg, num_regs);
+ }
+ }
+ return OptoReg::Bad; // will cause chunk change, and retry next chunk
+ }
+ }
+
+ return assigned;
+}
+
// Choose a color using the biasing heuristic
OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
@@ -1391,7 +1458,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
RegMask tempmask = lrg.mask();
tempmask.AND(lrgs(copy_lrg).mask());
tempmask.clear_to_sets(lrg.num_regs());
- OptoReg::Name reg = tempmask.find_first_set(lrg.num_regs());
+ OptoReg::Name reg = find_first_set(lrg, tempmask, chunk);
if (OptoReg::is_valid(reg))
return reg;
}
@@ -1400,7 +1467,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
// If no bias info exists, just go with the register selection ordering
if (lrg._is_vector || lrg.num_regs() == 2) {
// Find an aligned set
- return OptoReg::add(lrg.mask().find_first_set(lrg.num_regs()),chunk);
+ return OptoReg::add(find_first_set(lrg, lrg.mask(), chunk), chunk);
}
// CNC - Fun hack. Alternate 1st and 2nd selection. Enables post-allocate
@@ -1455,7 +1522,6 @@ uint PhaseChaitin::Select( ) {
LRG *lrg = &lrgs(lidx);
_simplified = lrg->_next;
-
#ifndef PRODUCT
if (trace_spilling()) {
ttyLocker ttyl;
@@ -1539,7 +1605,6 @@ uint PhaseChaitin::Select( ) {
// Bump register mask up to next stack chunk
chunk += RegMask::CHUNK_SIZE;
lrg->Set_All();
-
goto retry_next_chunk;
}
@@ -1564,12 +1629,21 @@ uint PhaseChaitin::Select( ) {
int n_regs = lrg->num_regs();
assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity");
if (n_regs == 1 || !lrg->_fat_proj) {
- assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity");
+ if (Matcher::supports_scalable_vector()) {
+ assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecA, "sanity");
+ } else {
+ assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity");
+ }
lrg->Clear(); // Clear the mask
lrg->Insert(reg); // Set regmask to match selected reg
// For vectors and pairs, also insert the low bit of the pair
- for (int i = 1; i < n_regs; i++)
+ // We always choose the high bit, then mask the low bits by register size
+ if (lrg->is_scalable() && OptoReg::is_stack(lrg->reg())) { // stack
+ n_regs = lrg->scalable_reg_slots();
+ }
+ for (int i = 1; i < n_regs; i++) {
lrg->Insert(OptoReg::add(reg,-i));
+ }
lrg->set_mask_size(n_regs);
} else { // Else fatproj
// mask must be equal to fatproj bits, by definition
diff --git a/src/hotspot/share/opto/chaitin.hpp b/src/hotspot/share/opto/chaitin.hpp
index e5be5b966..5408a24ef 100644
--- a/src/hotspot/share/opto/chaitin.hpp
+++ b/src/hotspot/share/opto/chaitin.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -115,7 +115,9 @@ public:
_msize_valid=1;
if (_is_vector) {
assert(!_fat_proj, "sanity");
- _mask.verify_sets(_num_regs);
+ if (!(_is_scalable && OptoReg::is_stack(_reg))) {
+ _mask.verify_sets(_num_regs);
+ }
} else if (_num_regs == 2 && !_fat_proj) {
_mask.verify_pairs();
}
@@ -139,14 +141,37 @@ public:
void clear_to_pairs() { _mask.clear_to_pairs(); debug_only(_msize_valid=0;) }
void clear_to_sets() { _mask.clear_to_sets(_num_regs); debug_only(_msize_valid=0;) }
- // Number of registers this live range uses when it colors
private:
+ // Number of registers this live range uses when it colors
uint16_t _num_regs; // 2 for Longs and Doubles, 1 for all else
// except _num_regs is kill count for fat_proj
+
+ // For scalable register, num_regs may not be the actual physical register size.
+ // We need to get the actual physical length of scalable register when scalable
+ // register is spilled. The size of one slot is 32-bit.
+ uint _scalable_reg_slots; // Actual scalable register length of slots.
+ // Meaningful only when _is_scalable is true.
public:
int num_regs() const { return _num_regs; }
void set_num_regs( int reg ) { assert( _num_regs == reg || !_num_regs, "" ); _num_regs = reg; }
+ uint scalable_reg_slots() { return _scalable_reg_slots; }
+ void set_scalable_reg_slots(uint slots) {
+ assert(_is_scalable, "scalable register");
+ assert(slots > 0, "slots of scalable register is not valid");
+ _scalable_reg_slots = slots;
+ }
+
+ bool is_scalable() {
+#ifdef ASSERT
+ if (_is_scalable) {
+ // Should only be a vector for now, but it could also be a RegVMask in future.
+ assert(_is_vector && (_num_regs == RegMask::SlotsPerVecA), "unexpected scalable reg");
+ }
+#endif
+ return _is_scalable;
+ }
+
private:
// Number of physical registers this live range uses when it colors
// Architecture and register-set dependent
@@ -172,6 +197,8 @@ public:
uint _is_oop:1, // Live-range holds an oop
_is_float:1, // True if in float registers
_is_vector:1, // True if in vector registers
+ _is_scalable:1, // True if register size is scalable
+ // e.g. Arm SVE vector/predicate registers.
_was_spilled1:1, // True if prior spilling on def
_was_spilled2:1, // True if twice prior spilling on def
_is_bound:1, // live range starts life with no
diff --git a/src/hotspot/share/opto/matcher.cpp b/src/hotspot/share/opto/matcher.cpp
index 4cc7580a8..4fb732161 100644
--- a/src/hotspot/share/opto/matcher.cpp
+++ b/src/hotspot/share/opto/matcher.cpp
@@ -84,6 +84,7 @@ Matcher::Matcher()
idealreg2spillmask [Op_RegF] = NULL;
idealreg2spillmask [Op_RegD] = NULL;
idealreg2spillmask [Op_RegP] = NULL;
+ idealreg2spillmask [Op_VecA] = NULL;
idealreg2spillmask [Op_VecS] = NULL;
idealreg2spillmask [Op_VecD] = NULL;
idealreg2spillmask [Op_VecX] = NULL;
@@ -97,6 +98,7 @@ Matcher::Matcher()
idealreg2debugmask [Op_RegF] = NULL;
idealreg2debugmask [Op_RegD] = NULL;
idealreg2debugmask [Op_RegP] = NULL;
+ idealreg2debugmask [Op_VecA] = NULL;
idealreg2debugmask [Op_VecS] = NULL;
idealreg2debugmask [Op_VecD] = NULL;
idealreg2debugmask [Op_VecX] = NULL;
@@ -110,6 +112,7 @@ Matcher::Matcher()
idealreg2mhdebugmask[Op_RegF] = NULL;
idealreg2mhdebugmask[Op_RegD] = NULL;
idealreg2mhdebugmask[Op_RegP] = NULL;
+ idealreg2mhdebugmask[Op_VecA] = NULL;
idealreg2mhdebugmask[Op_VecS] = NULL;
idealreg2mhdebugmask[Op_VecD] = NULL;
idealreg2mhdebugmask[Op_VecX] = NULL;
@@ -417,6 +420,8 @@ static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) {
return rms;
}
+#define NOF_STACK_MASKS (3*6+6)
+
//---------------------------init_first_stack_mask-----------------------------
// Create the initial stack mask used by values spilling to the stack.
// Disallow any debug info in outgoing argument areas by setting the
@@ -424,7 +429,12 @@ static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) {
void Matcher::init_first_stack_mask() {
// Allocate storage for spill masks as masks for the appropriate load type.
- RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+5));
+ RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * NOF_STACK_MASKS);
+
+ // Initialize empty placeholder masks into the newly allocated arena
+ for (int i = 0; i < NOF_STACK_MASKS; i++) {
+ new (rms + i) RegMask();
+ }
idealreg2spillmask [Op_RegN] = &rms[0];
idealreg2spillmask [Op_RegI] = &rms[1];
@@ -447,11 +457,12 @@ void Matcher::init_first_stack_mask() {
idealreg2mhdebugmask[Op_RegD] = &rms[16];
idealreg2mhdebugmask[Op_RegP] = &rms[17];
- idealreg2spillmask [Op_VecS] = &rms[18];
- idealreg2spillmask [Op_VecD] = &rms[19];
- idealreg2spillmask [Op_VecX] = &rms[20];
- idealreg2spillmask [Op_VecY] = &rms[21];
- idealreg2spillmask [Op_VecZ] = &rms[22];
+ idealreg2spillmask [Op_VecA] = &rms[18];
+ idealreg2spillmask [Op_VecS] = &rms[19];
+ idealreg2spillmask [Op_VecD] = &rms[20];
+ idealreg2spillmask [Op_VecX] = &rms[21];
+ idealreg2spillmask [Op_VecY] = &rms[22];
+ idealreg2spillmask [Op_VecZ] = &rms[23];
OptoReg::Name i;
@@ -478,6 +489,7 @@ void Matcher::init_first_stack_mask() {
// Keep spill masks aligned.
aligned_stack_mask.clear_to_pairs();
assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
+ RegMask scalable_stack_mask = aligned_stack_mask;
*idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP];
#ifdef _LP64
@@ -548,28 +560,48 @@ void Matcher::init_first_stack_mask() {
*idealreg2spillmask[Op_VecZ] = *idealreg2regmask[Op_VecZ];
idealreg2spillmask[Op_VecZ]->OR(aligned_stack_mask);
}
- if (UseFPUForSpilling) {
- // This mask logic assumes that the spill operations are
- // symmetric and that the registers involved are the same size.
- // On sparc for instance we may have to use 64 bit moves will
- // kill 2 registers when used with F0-F31.
- idealreg2spillmask[Op_RegI]->OR(*idealreg2regmask[Op_RegF]);
- idealreg2spillmask[Op_RegF]->OR(*idealreg2regmask[Op_RegI]);
+
+ if (Matcher::supports_scalable_vector()) {
+ int k = 1;
+ OptoReg::Name in = OptoReg::add(_in_arg_limit, -1);
+ // Exclude last input arg stack slots to avoid spilling vector register there,
+ // otherwise vector spills could stomp over stack slots in caller frame.
+ for (; (in >= init_in) && (k < scalable_vector_reg_size(T_FLOAT)); k++) {
+ scalable_stack_mask.Remove(in);
+ in = OptoReg::add(in, -1);
+ }
+
+ // For VecA
+ scalable_stack_mask.clear_to_sets(RegMask::SlotsPerVecA);
+ assert(scalable_stack_mask.is_AllStack(), "should be infinite stack");
+ *idealreg2spillmask[Op_VecA] = *idealreg2regmask[Op_VecA];
+ idealreg2spillmask[Op_VecA]->OR(scalable_stack_mask);
+ } else {
+ *idealreg2spillmask[Op_VecA] = RegMask::Empty;
+ }
+
+ if (UseFPUForSpilling) {
+ // This mask logic assumes that the spill operations are
+ // symmetric and that the registers involved are the same size.
+ // On sparc for instance we may have to use 64 bit moves will
+ // kill 2 registers when used with F0-F31.
+ idealreg2spillmask[Op_RegI]->OR(*idealreg2regmask[Op_RegF]);
+ idealreg2spillmask[Op_RegF]->OR(*idealreg2regmask[Op_RegI]);
#ifdef _LP64
- idealreg2spillmask[Op_RegN]->OR(*idealreg2regmask[Op_RegF]);
- idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
- idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
- idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegD]);
+ idealreg2spillmask[Op_RegN]->OR(*idealreg2regmask[Op_RegF]);
+ idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
+ idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
+ idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegD]);
#else
- idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegF]);
+ idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegF]);
#ifdef ARM
- // ARM has support for moving 64bit values between a pair of
- // integer registers and a double register
- idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
- idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
+ // ARM has support for moving 64bit values between a pair of
+ // integer registers and a double register
+ idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
+ idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
#endif
#endif
- }
+ }
// Make up debug masks. Any spill slot plus callee-save registers.
// Caller-save registers are assumed to be trashable by the various
@@ -872,6 +904,10 @@ void Matcher::init_spill_mask( Node *ret ) {
idealreg2regmask[Op_RegP] = &spillP->out_RegMask();
// Vector regmasks.
+ if (Matcher::supports_scalable_vector()) {
+ MachNode *spillVectA = match_tree(new LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTA));
+ idealreg2regmask[Op_VecA] = &spillVectA->out_RegMask();
+ }
if (Matcher::vector_size_supported(T_BYTE,4)) {
TypeVect::VECTS = TypeVect::make(T_BYTE, 4);
MachNode *spillVectS = match_tree(new LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTS));
@@ -1575,7 +1611,6 @@ Node* Matcher::Label_Root(const Node* n, State* svec, Node* control, Node*& mem)
}
}
-
// Call DFA to match this node, and return
svec->DFA( n->Opcode(), n );
diff --git a/src/hotspot/share/opto/matcher.hpp b/src/hotspot/share/opto/matcher.hpp
index 244e3d1f8..ed890f88e 100644
--- a/src/hotspot/share/opto/matcher.hpp
+++ b/src/hotspot/share/opto/matcher.hpp
@@ -333,6 +333,10 @@ public:
Matcher::min_vector_size(bt) <= size);
}
+ static const bool supports_scalable_vector();
+ // Actual max scalable vector register length.
+ static const int scalable_vector_reg_size(const BasicType bt);
+
// Vector ideal reg
static const uint vector_ideal_reg(int len);
static const uint vector_shift_count_ideal_reg(int len);
diff --git a/src/hotspot/share/opto/opcodes.cpp b/src/hotspot/share/opto/opcodes.cpp
index e31e8d847..1a826d8ba 100644
--- a/src/hotspot/share/opto/opcodes.cpp
+++ b/src/hotspot/share/opto/opcodes.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -38,12 +38,14 @@ const char *NodeClassNames[] = {
"RegF",
"RegD",
"RegL",
- "RegFlags",
+ "VecA",
"VecS",
"VecD",
"VecX",
"VecY",
"VecZ",
+ "RegVMask",
+ "RegFlags",
"_last_machine_leaf",
#include "classes.hpp"
"_last_class_name",
diff --git a/src/hotspot/share/opto/opcodes.hpp b/src/hotspot/share/opto/opcodes.hpp
index ae3d61ce0..ec96ba055 100644
--- a/src/hotspot/share/opto/opcodes.hpp
+++ b/src/hotspot/share/opto/opcodes.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -37,11 +37,13 @@ enum Opcodes {
macro(RegF) // Machine float register
macro(RegD) // Machine double register
macro(RegL) // Machine long register
+ macro(VecA) // Machine vectora register
macro(VecS) // Machine vectors register
macro(VecD) // Machine vectord register
macro(VecX) // Machine vectorx register
macro(VecY) // Machine vectory register
macro(VecZ) // Machine vectorz register
+ macro(RegVMask) // Vector mask/predicate register
macro(RegFlags) // Machine flags register
_last_machine_leaf, // Split between regular opcodes and machine
#include "classes.hpp"
diff --git a/src/hotspot/share/opto/postaloc.cpp b/src/hotspot/share/opto/postaloc.cpp
index d572ac9fe..3514b37bc 100644
--- a/src/hotspot/share/opto/postaloc.cpp
+++ b/src/hotspot/share/opto/postaloc.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -266,9 +266,9 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v
Node *val = skip_copies(n->in(k));
if (val == x) return blk_adjust; // No progress?
- int n_regs = RegMask::num_registers(val->ideal_reg());
uint val_idx = _lrg_map.live_range_id(val);
OptoReg::Name val_reg = lrgs(val_idx).reg();
+ int n_regs = RegMask::num_registers(val->ideal_reg(), lrgs(val_idx));
// See if it happens to already be in the correct register!
// (either Phi's direct register, or the common case of the name
@@ -305,8 +305,26 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v
}
Node *vv = value[reg];
+ // For scalable register, number of registers may be inconsistent between
+ // "val_reg" and "reg". For example, when "val" resides in register
+ // but "reg" is located in stack.
+ if (lrgs(val_idx).is_scalable()) {
+ assert(val->ideal_reg() == Op_VecA, "scalable vector register");
+ if (OptoReg::is_stack(reg)) {
+ n_regs = lrgs(val_idx).scalable_reg_slots();
+ } else {
+ n_regs = RegMask::SlotsPerVecA;
+ }
+ }
if (n_regs > 1) { // Doubles and vectors check for aligned-adjacent set
- uint last = (n_regs-1); // Looking for the last part of a set
+ uint last;
+ if (lrgs(val_idx).is_scalable()) {
+ assert(val->ideal_reg() == Op_VecA, "scalable vector register");
+ // For scalable vector register, regmask is always SlotsPerVecA bits aligned
+ last = RegMask::SlotsPerVecA - 1;
+ } else {
+ last = (n_regs-1); // Looking for the last part of a set
+ }
if ((reg&last) != last) continue; // Wrong part of a set
if (!register_contains_value(vv, reg, n_regs, value)) continue; // Different value
}
@@ -591,7 +609,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
uint k;
Node *phi = block->get_node(j);
uint pidx = _lrg_map.live_range_id(phi);
- OptoReg::Name preg = lrgs(_lrg_map.live_range_id(phi)).reg();
+ OptoReg::Name preg = lrgs(pidx).reg();
// Remove copies remaining on edges. Check for junk phi.
Node *u = NULL;
@@ -619,7 +637,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
if( pidx ) {
value.map(preg,phi);
regnd.map(preg,phi);
- int n_regs = RegMask::num_registers(phi->ideal_reg());
+ int n_regs = RegMask::num_registers(phi->ideal_reg(), lrgs(pidx));
for (int l = 1; l < n_regs; l++) {
OptoReg::Name preg_lo = OptoReg::add(preg,-l);
value.map(preg_lo,phi);
@@ -663,7 +681,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
regnd.map(ureg, def);
// Record other half of doubles
uint def_ideal_reg = def->ideal_reg();
- int n_regs = RegMask::num_registers(def_ideal_reg);
+ int n_regs = RegMask::num_registers(def_ideal_reg, lrgs(_lrg_map.live_range_id(def)));
for (int l = 1; l < n_regs; l++) {
OptoReg::Name ureg_lo = OptoReg::add(ureg,-l);
if (!value[ureg_lo] &&
@@ -707,7 +725,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
}
uint n_ideal_reg = n->ideal_reg();
- int n_regs = RegMask::num_registers(n_ideal_reg);
+ int n_regs = RegMask::num_registers(n_ideal_reg, lrgs(lidx));
if (n_regs == 1) {
// If Node 'n' does not change the value mapped by the register,
// then 'n' is a useless copy. Do not update the register->node
diff --git a/src/hotspot/share/opto/regmask.cpp b/src/hotspot/share/opto/regmask.cpp
index 2e04c42eb..dd9b5476b 100644
--- a/src/hotspot/share/opto/regmask.cpp
+++ b/src/hotspot/share/opto/regmask.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -24,6 +24,7 @@
#include "precompiled.hpp"
#include "opto/ad.hpp"
+#include "opto/chaitin.hpp"
#include "opto/compile.hpp"
#include "opto/matcher.hpp"
#include "opto/node.hpp"
@@ -116,30 +117,47 @@ const RegMask RegMask::Empty(
//=============================================================================
bool RegMask::is_vector(uint ireg) {
- return (ireg == Op_VecS || ireg == Op_VecD ||
+ return (ireg == Op_VecA || ireg == Op_VecS || ireg == Op_VecD ||
ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ );
}
int RegMask::num_registers(uint ireg) {
switch(ireg) {
case Op_VecZ:
- return 16;
+ return SlotsPerVecZ;
case Op_VecY:
- return 8;
+ return SlotsPerVecY;
case Op_VecX:
- return 4;
+ return SlotsPerVecX;
case Op_VecD:
+ return SlotsPerVecD;
case Op_RegD:
case Op_RegL:
#ifdef _LP64
case Op_RegP:
#endif
return 2;
+ case Op_VecA:
+ assert(Matcher::supports_scalable_vector(), "does not support scalable vector");
+ return SlotsPerVecA;
}
// Op_VecS and the rest ideal registers.
return 1;
}
+int RegMask::num_registers(uint ireg, LRG &lrg) {
+ int n_regs = num_registers(ireg);
+
+ // assigned is OptoReg which is selected by register allocator
+ OptoReg::Name assigned = lrg.reg();
+ assert(OptoReg::is_valid(assigned), "should be valid opto register");
+
+ if (lrg.is_scalable() && OptoReg::is_stack(assigned)) {
+ n_regs = lrg.scalable_reg_slots();
+ }
+ return n_regs;
+}
+
//------------------------------find_first_pair--------------------------------
// Find the lowest-numbered register pair in the mask. Return the
// HIGHEST register number in the pair, or BAD if no pairs.
@@ -238,14 +256,30 @@ int RegMask::is_bound_pair() const {
return true;
}
+// Check that whether given reg number with size is valid
+// for current regmask, where reg is the highest number.
+bool RegMask::is_valid_reg(OptoReg::Name reg, const int size) const {
+ for (int i = 0; i < size; i++) {
+ if (!Member(reg - i)) {
+ return false;
+ }
+ }
+ return true;
+}
+
// only indicies of power 2 are accessed, so index 3 is only filled in for storage.
static int low_bits[5] = { 0x55555555, 0x11111111, 0x01010101, 0x00000000, 0x00010001 };
//------------------------------find_first_set---------------------------------
// Find the lowest-numbered register set in the mask. Return the
// HIGHEST register number in the set, or BAD if no sets.
// Works also for size 1.
-OptoReg::Name RegMask::find_first_set(const int size) const {
- verify_sets(size);
+OptoReg::Name RegMask::find_first_set(LRG &lrg, const int size) const {
+ if (lrg.is_scalable()) {
+ // For scalable vector register, regmask is SlotsPerVecA bits aligned.
+ assert(is_aligned_sets(SlotsPerVecA), "mask is not aligned, adjacent sets");
+ } else {
+ assert(is_aligned_sets(size), "mask is not aligned, adjacent sets");
+ }
for (int i = 0; i < RM_SIZE; i++) {
if (_A[i]) { // Found some bits
int bit = _A[i] & -_A[i]; // Extract low bit
@@ -325,12 +359,16 @@ bool RegMask::is_aligned_sets(const int size) const {
while (bits) { // Check bits for pairing
int bit = bits & -bits; // Extract low bit
// Low bit is not odd means its mis-aligned.
- if ((bit & low_bits_mask) == 0) return false;
+ if ((bit & low_bits_mask) == 0) {
+ return false;
+ }
// Do extra work since (bit << size) may overflow.
int hi_bit = bit << (size-1); // high bit
int set = hi_bit + ((hi_bit-1) & ~(bit-1));
// Check for aligned adjacent bits in this set
- if ((bits & set) != set) return false;
+ if ((bits & set) != set) {
+ return false;
+ }
bits -= set; // Remove this set
}
}
diff --git a/src/hotspot/share/opto/regmask.hpp b/src/hotspot/share/opto/regmask.hpp
index c64d08795..b733b87ad 100644
--- a/src/hotspot/share/opto/regmask.hpp
+++ b/src/hotspot/share/opto/regmask.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -28,6 +28,8 @@
#include "code/vmreg.hpp"
#include "opto/optoreg.hpp"
+class LRG;
+
// Some fun naming (textual) substitutions:
//
// RegMask::get_low_elem() ==> RegMask::find_first_elem()
@@ -95,11 +97,13 @@ public:
// requirement is internal to the allocator, and independent of any
// particular platform.
enum { SlotsPerLong = 2,
+ SlotsPerVecA = 8,
SlotsPerVecS = 1,
SlotsPerVecD = 2,
SlotsPerVecX = 4,
SlotsPerVecY = 8,
- SlotsPerVecZ = 16 };
+ SlotsPerVecZ = 16,
+ };
// A constructor only used by the ADLC output. All mask fields are filled
// in directly. Calls to this look something like RM(1,2,3,4);
@@ -204,10 +208,14 @@ public:
return false;
}
+ // Check that whether given reg number with size is valid
+ // for current regmask, where reg is the highest number.
+ bool is_valid_reg(OptoReg::Name reg, const int size) const;
+
// Find the lowest-numbered register set in the mask. Return the
// HIGHEST register number in the set, or BAD if no sets.
// Assert that the mask contains only bit sets.
- OptoReg::Name find_first_set(const int size) const;
+ OptoReg::Name find_first_set(LRG &lrg, const int size) const;
// Clear out partial bits; leave only aligned adjacent bit sets of size.
void clear_to_sets(const int size);
@@ -226,6 +234,7 @@ public:
static bool is_vector(uint ireg);
static int num_registers(uint ireg);
+ static int num_registers(uint ireg, LRG &lrg);
// Fast overlap test. Non-zero if any registers in common.
int overlap( const RegMask &rm ) const {
diff --git a/src/hotspot/share/opto/type.cpp b/src/hotspot/share/opto/type.cpp
index 223b7a1c6..1b46cb452 100644
--- a/src/hotspot/share/opto/type.cpp
+++ b/src/hotspot/share/opto/type.cpp
@@ -79,6 +79,7 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = {
{ Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY
{ Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ
#else // all other
+ { Bad, T_ILLEGAL, "vectora:", false, Op_VecA, relocInfo::none }, // VectorA.
{ Bad, T_ILLEGAL, "vectors:", false, Op_VecS, relocInfo::none }, // VectorS
{ Bad, T_ILLEGAL, "vectord:", false, Op_VecD, relocInfo::none }, // VectorD
{ Bad, T_ILLEGAL, "vectorx:", false, Op_VecX, relocInfo::none }, // VectorX
@@ -655,6 +656,10 @@ void Type::Initialize_shared(Compile* current) {
// get_zero_type() should not happen for T_CONFLICT
_zero_type[T_CONFLICT]= NULL;
+ if (Matcher::supports_scalable_vector()) {
+ TypeVect::VECTA = TypeVect::make(T_BYTE, Matcher::scalable_vector_reg_size(T_BYTE));
+ }
+
// Vector predefined types, it needs initialized _const_basic_type[].
if (Matcher::vector_size_supported(T_BYTE,4)) {
TypeVect::VECTS = TypeVect::make(T_BYTE,4);
@@ -671,6 +676,8 @@ void Type::Initialize_shared(Compile* current) {
if (Matcher::vector_size_supported(T_FLOAT,16)) {
TypeVect::VECTZ = TypeVect::make(T_FLOAT,16);
}
+
+ mreg2type[Op_VecA] = TypeVect::VECTA;
mreg2type[Op_VecS] = TypeVect::VECTS;
mreg2type[Op_VecD] = TypeVect::VECTD;
mreg2type[Op_VecX] = TypeVect::VECTX;
@@ -990,6 +997,7 @@ const Type::TYPES Type::dual_type[Type::lastype] = {
Bad, // Tuple - handled in v-call
Bad, // Array - handled in v-call
+ Bad, // VectorA - handled in v-call
Bad, // VectorS - handled in v-call
Bad, // VectorD - handled in v-call
Bad, // VectorX - handled in v-call
@@ -1890,7 +1898,6 @@ const TypeTuple *TypeTuple::LONG_PAIR;
const TypeTuple *TypeTuple::INT_CC_PAIR;
const TypeTuple *TypeTuple::LONG_CC_PAIR;
-
//------------------------------make-------------------------------------------
// Make a TypeTuple from the range of a method signature
const TypeTuple *TypeTuple::make_range(ciSignature* sig) {
@@ -2262,6 +2269,7 @@ bool TypeAry::ary_must_be_exact() const {
//==============================TypeVect=======================================
// Convenience common pre-built types.
+const TypeVect *TypeVect::VECTA = NULL; // vector length agnostic
const TypeVect *TypeVect::VECTS = NULL; // 32-bit vectors
const TypeVect *TypeVect::VECTD = NULL; // 64-bit vectors
const TypeVect *TypeVect::VECTX = NULL; // 128-bit vectors
@@ -2272,10 +2280,11 @@ const TypeVect *TypeVect::VECTZ = NULL; // 512-bit vectors
const TypeVect* TypeVect::make(const Type *elem, uint length) {
BasicType elem_bt = elem->array_element_basic_type();
assert(is_java_primitive(elem_bt), "only primitive types in vector");
- assert(length > 1 && is_power_of_2(length), "vector length is power of 2");
assert(Matcher::vector_size_supported(elem_bt, length), "length in range");
int size = length * type2aelembytes(elem_bt);
switch (Matcher::vector_ideal_reg(size)) {
+ case Op_VecA:
+ return (TypeVect*)(new TypeVectA(elem, length))->hashcons();
case Op_VecS:
return (TypeVect*)(new TypeVectS(elem, length))->hashcons();
case Op_RegL:
@@ -2307,7 +2316,7 @@ const Type *TypeVect::xmeet( const Type *t ) const {
default: // All else is a mistake
typerr(t);
-
+ case VectorA:
case VectorS:
case VectorD:
case VectorX:
@@ -2362,6 +2371,8 @@ bool TypeVect::empty(void) const {
#ifndef PRODUCT
void TypeVect::dump2(Dict &d, uint depth, outputStream *st) const {
switch (base()) {
+ case VectorA:
+ st->print("vectora["); break;
case VectorS:
st->print("vectors["); break;
case VectorD:
diff --git a/src/hotspot/share/opto/type.hpp b/src/hotspot/share/opto/type.hpp
index a7eec281e..6787b947d 100644
--- a/src/hotspot/share/opto/type.hpp
+++ b/src/hotspot/share/opto/type.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -53,6 +53,7 @@ class TypeNarrowKlass;
class TypeAry;
class TypeTuple;
class TypeVect;
+class TypeVectA;
class TypeVectS;
class TypeVectD;
class TypeVectX;
@@ -87,6 +88,7 @@ public:
Tuple, // Method signature or object layout
Array, // Array types
+ VectorA, // (Scalable) Vector types for vector length agnostic
VectorS, // 32bit Vector types
VectorD, // 64bit Vector types
VectorX, // 128bit Vector types
@@ -758,6 +760,7 @@ public:
virtual const Type *xmeet( const Type *t) const;
virtual const Type *xdual() const; // Compute dual right now.
+ static const TypeVect *VECTA;
static const TypeVect *VECTS;
static const TypeVect *VECTD;
static const TypeVect *VECTX;
@@ -769,6 +772,11 @@ public:
#endif
};
+class TypeVectA : public TypeVect {
+ friend class TypeVect;
+ TypeVectA(const Type* elem, uint length) : TypeVect(VectorA, elem, length) {}
+};
+
class TypeVectS : public TypeVect {
friend class TypeVect;
TypeVectS(const Type* elem, uint length) : TypeVect(VectorS, elem, length) {}
@@ -1619,12 +1627,12 @@ inline const TypeAry *Type::is_ary() const {
}
inline const TypeVect *Type::is_vect() const {
- assert( _base >= VectorS && _base <= VectorZ, "Not a Vector" );
+ assert( _base >= VectorA && _base <= VectorZ, "Not a Vector" );
return (TypeVect*)this;
}
inline const TypeVect *Type::isa_vect() const {
- return (_base >= VectorS && _base <= VectorZ) ? (TypeVect*)this : NULL;
+ return (_base >= VectorA && _base <= VectorZ) ? (TypeVect*)this : NULL;
}
inline const TypePtr *Type::is_ptr() const {
--
2.19.1
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化