加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0051-Port-fixes-for-IPA-prefetch-to-GCC-12.patch 58.31 KB
一键复制 编辑 原始数据 按行查看 历史
郑晨卉 提交于 2024-04-11 10:45 . [Sync] Sync patch from openeuler/gcc
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216
From 4c262af8e178ac7c81b32be5b159b4d09a5841c9 Mon Sep 17 00:00:00 2001
From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
Date: Fri, 8 Mar 2024 07:07:50 +0800
Subject: [PATCH 1/2] Port fixes for IPA prefetch to GCC 12
---
gcc/ipa-devirt.cc | 9 +-
gcc/ipa-prefetch.cc | 174 +-
gcc/ipa-sra.cc | 7 +
gcc/params.opt | 4 +-
gcc/testsuite/gcc.dg/completion-1.c | 1 +
gcc/testsuite/gcc.dg/ipa/ipa-prefetch-xz.c | 1843 ++++++++++++++++++++
6 files changed, 1974 insertions(+), 64 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/ipa/ipa-prefetch-xz.c
diff --git a/gcc/ipa-devirt.cc b/gcc/ipa-devirt.cc
index dd3562d56..dd000b401 100644
--- a/gcc/ipa-devirt.cc
+++ b/gcc/ipa-devirt.cc
@@ -5029,9 +5029,12 @@ analyze_assign_stmt (gimple *stmt)
}
else
{
- fprintf (dump_file, "\nUnsupported rhs type %s in assign stmt: ",
- get_tree_code_name (TREE_CODE (rhs)));
- print_gimple_stmt (dump_file, stmt, 0);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nUnsupported rhs type %s in assign stmt: ",
+ get_tree_code_name (TREE_CODE (rhs)));
+ print_gimple_stmt (dump_file, stmt, 0);
+ }
gcc_unreachable ();
}
}
diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc
index aeea51105..9537e4835 100644
--- a/gcc/ipa-prefetch.cc
+++ b/gcc/ipa-prefetch.cc
@@ -167,6 +167,7 @@ analyse_cgraph ()
}
/* TODO: maybe remove loop info here. */
+ n->get_body ();
push_cfun (DECL_STRUCT_FUNCTION (n->decl));
calculate_dominance_info (CDI_DOMINATORS);
loop_optimizer_init (LOOPS_NORMAL);
@@ -942,6 +943,9 @@ compare_memrefs (memref_t* mr, memref_t* mr2)
(*mr_candidate_map)[mr] = mr2;
return;
}
+ /* Probably we shouldn't leave nulls in the map. */
+ if ((*mr_candidate_map)[mr] == NULL)
+ return;
/* TODO: support analysis with incrementation of different fields. */
if ((*mr_candidate_map)[mr]->offset != mr2->offset)
{
@@ -1090,6 +1094,15 @@ analyse_loops ()
memref_t *mr = it->first, *mr2 = it->second;
if (mr2 == NULL || !(*fmrs_map)[fn]->count (mr))
continue;
+ /* For now optimize only MRs that mem is MEM_REF.
+ TODO: support other MR types. */
+ if (TREE_CODE (mr->mem) != MEM_REF)
+ {
+ if (dump_file)
+ fprintf (dump_file, "Skip MR %d: unsupported tree code = %s\n",
+ mr->mr_id, get_tree_code_name (TREE_CODE (mr->mem)));
+ continue;
+ }
if (!optimize_mrs_map->count (fn))
(*optimize_mrs_map)[fn] = new memref_set;
(*optimize_mrs_map)[fn]->insert (mr);
@@ -1102,7 +1115,7 @@ analyse_loops ()
it != (*optimize_mrs_map)[fn]->end (); it++)
{
memref_t *mr = *it, *mr2 = (*mr_candidate_map)[mr];
- fprintf (dump_file, "MRs %d,%d with incremental offset ",
+ fprintf (dump_file, "MRs %d, %d with incremental offset ",
mr->mr_id, mr2->mr_id);
print_generic_expr (dump_file, mr2->offset);
fprintf (dump_file, "\n");
@@ -1435,6 +1448,52 @@ remap_gimple_op_r (tree *tp, int *walk_subtrees, void *data)
return NULL_TREE;
}
+/* Copy stmt and remap its operands. */
+
+static gimple *
+gimple_copy_and_remap (gimple *stmt)
+{
+ gimple *copy = gimple_copy (stmt);
+ gcc_checking_assert (!is_gimple_debug (copy));
+
+ /* Remap all the operands in COPY. */
+ struct walk_stmt_info wi;
+ memset (&wi, 0, sizeof (wi));
+ wi.info = copy;
+ walk_gimple_op (copy, remap_gimple_op_r, &wi);
+ if (dump_file)
+ {
+ fprintf (dump_file, "Stmt copy after remap:\n");
+ print_gimple_stmt (dump_file, copy, 0);
+ }
+ return copy;
+}
+
+/* Copy and remap stmts listed in MR in reverse order to last_idx, skipping
+ processed ones. Insert new stmts to the sequence. */
+
+static gimple *
+gimple_copy_and_remap_memref_stmts (memref_t *mr, gimple_seq &stmts,
+ int last_idx, stmt_set &processed)
+{
+ gimple *last_stmt = NULL;
+ for (int i = mr->stmts.length () - 1; i >= last_idx ; i--)
+ {
+ if (processed.count (mr->stmts[i]))
+ continue;
+ processed.insert (mr->stmts[i]);
+ if (dump_file)
+ {
+ fprintf (dump_file, "Copy stmt %d from used MR (%d):\n",
+ i, mr->mr_id);
+ print_gimple_stmt (dump_file, mr->stmts[i], 0);
+ }
+ last_stmt = gimple_copy_and_remap (mr->stmts[i]);
+ gimple_seq_add_stmt (&stmts, last_stmt);
+ }
+ return last_stmt;
+}
+
static void
create_cgraph_edge (cgraph_node *n, gimple *stmt)
{
@@ -1490,6 +1549,13 @@ optimize_function (cgraph_node *n, function *fn)
"Skip the case.\n");
return 0;
}
+ if (!tree_fits_shwi_p (inc_mr->step))
+ {
+ if (dump_file)
+ fprintf (dump_file, "Cannot represent incremental MR's step as "
+ "integer. Skip the case.\n");
+ return 0;
+ }
if (dump_file && !used_mrs.empty ())
print_mrs_ids (used_mrs, "Common list of used mrs:\n");
@@ -1539,16 +1605,44 @@ optimize_function (cgraph_node *n, function *fn)
return 0;
}
else if (dump_file)
- fprintf (dump_file, "Dominator bb %d for MRs\n", dom_bb->index);
+ {
+ fprintf (dump_file, "Dominator bb %d for MRs:\n", dom_bb->index);
+ gimple_dump_bb (dump_file, dom_bb, 0, dump_flags);
+ fprintf (dump_file, "\n");
+ }
- split_block (dom_bb, (gimple *) NULL);
+ /* Try to find comp_mr's stmt in the dominator bb. */
+ gimple *last_used = NULL;
+ for (gimple_stmt_iterator si = gsi_last_bb (dom_bb); !gsi_end_p (si);
+ gsi_prev (&si))
+ if (comp_mr->stmts[0] == gsi_stmt (si))
+ {
+ last_used = gsi_stmt (si);
+ if (dump_file)
+ {
+ fprintf (dump_file, "Last used stmt in dominator bb:\n");
+ print_gimple_stmt (dump_file, last_used, 0);
+ }
+ break;
+ }
+
+ split_block (dom_bb, last_used);
gimple_stmt_iterator gsi = gsi_last_bb (dom_bb);
/* Create new inc var. Insert new_var = old_var + step * factor. */
decl_map = new tree_map;
gcc_assert (comp_mr->stmts[0] && gimple_assign_single_p (comp_mr->stmts[0]));
tree inc_var = gimple_assign_lhs (comp_mr->stmts[0]);
+ /* If old_var definition dominates the current use, just use it, otherwise
+ evaluate it just before new inc var evaluation. */
gimple_seq stmts = NULL;
+ stmt_set processed_stmts;
+ if (!dominated_by_p (CDI_DOMINATORS, dom_bb, gimple_bb (comp_mr->stmts[0])))
+ {
+ gimple *tmp = gimple_copy_and_remap_memref_stmts (comp_mr, stmts, 0,
+ processed_stmts);
+ inc_var = gimple_assign_lhs (tmp);
+ }
tree var_type = TREE_TYPE (inc_var);
enum tree_code inc_code;
if (TREE_CODE (var_type) == POINTER_TYPE)
@@ -1556,52 +1650,28 @@ optimize_function (cgraph_node *n, function *fn)
else
inc_code = PLUS_EXPR;
tree step = inc_mr->step;
- unsigned dist_val = tree_to_uhwi (step) * param_ipa_prefetch_distance_factor;
+ HOST_WIDE_INT dist_val = tree_to_shwi (step)
+ * param_ipa_prefetch_distance_factor;
tree dist = build_int_cst (TREE_TYPE (step), dist_val);
tree new_inc_var = gimple_build (&stmts, inc_code, var_type, inc_var, dist);
(*decl_map)[inc_var] = new_inc_var;
+ if (dump_file)
+ {
+ fprintf (dump_file, "New distance value: %ld, new inc var: ", dist_val);
+ print_generic_expr (dump_file, new_inc_var);
+ fprintf (dump_file, "\n");
+ }
/* Create other new vars. Insert new stmts. */
- struct walk_stmt_info wi;
- stmt_set processed_stmts;
- memref_tree_map mr_new_trees;
for (memref_set::const_iterator it = used_mrs.begin ();
it != used_mrs.end (); it++)
{
memref_t *mr = *it;
- gimple *last_stmt = NULL;
if (mr == comp_mr)
continue;
- for (int i = mr->stmts.length () - 1; i >= 0 ; i--)
- {
- if (processed_stmts.count (mr->stmts[i]))
- continue;
- processed_stmts.insert (mr->stmts[i]);
- if (dump_file)
- {
- fprintf (dump_file, "Copy stmt %d from used MR (%d):\n",
- i, mr->mr_id);
- print_gimple_stmt (dump_file, mr->stmts[i], 0);
- }
- /* Create a new copy of STMT and duplicate STMT's virtual
- operands. */
- gimple *copy = gimple_copy (mr->stmts[i]);
- gcc_checking_assert (!is_gimple_debug (copy));
-
- /* Remap all the operands in COPY. */
- memset (&wi, 0, sizeof (wi));
- last_stmt = copy;
- wi.info = copy;
- walk_gimple_op (copy, remap_gimple_op_r, &wi);
- if (dump_file)
- {
- fprintf (dump_file, "Stmt %d after remap:\n",i);
- print_gimple_stmt (dump_file, copy, 0);
- }
- gimple_seq_add_stmt (&stmts, copy);
- }
+ gimple *last_stmt = gimple_copy_and_remap_memref_stmts (mr, stmts, 0,
+ processed_stmts);
gcc_assert (last_stmt);
- mr_new_trees[mr] = gimple_assign_lhs (last_stmt);
if (dump_file)
{
fprintf (dump_file, "MR (%d) new mem: ", mr->mr_id);
@@ -1637,29 +1707,9 @@ optimize_function (cgraph_node *n, function *fn)
memref_t *mr = vmrs[j];
/* Don't need to copy the last stmt, since we insert prefetch insn
instead of it. */
- for (int i = mr->stmts.length () - 1; i >= 1 ; i--)
- {
- if (processed_stmts.count (mr->stmts[i]))
- continue;
- processed_stmts.insert (mr->stmts[i]);
-
- gimple *copy = gimple_copy (mr->stmts[i]);
- gcc_checking_assert (!is_gimple_debug (copy));
-
- /* Remap all the operands in COPY. */
- memset (&wi, 0, sizeof (wi));
- wi.info = copy;
- walk_gimple_op (copy, remap_gimple_op_r, &wi);
- if (dump_file)
- {
- fprintf (dump_file, "Stmt %d after remap:\n",i);
- print_gimple_stmt (dump_file, copy, 0);
- }
- gimple_seq_add_stmt (&stmts, copy);
- }
+ gimple_copy_and_remap_memref_stmts (mr, stmts, 1, processed_stmts);
gimple *last_stmt = mr->stmts[0];
gcc_assert (last_stmt);
- mr_new_trees[mr] = gimple_assign_lhs (last_stmt);
tree write_p = mr->is_store ? integer_one_node : integer_zero_node;
tree addr = get_mem_ref_address_ssa_name (mr->mem, NULL_TREE);
if (decl_map->count (addr))
@@ -1668,6 +1718,11 @@ optimize_function (cgraph_node *n, function *fn)
3, addr, write_p, local);
pcalls.safe_push (last_stmt);
gimple_seq_add_stmt (&stmts, last_stmt);
+ if (dump_file)
+ {
+ fprintf (dump_file, "Insert %d prefetch stmt:\n", j);
+ print_gimple_stmt (dump_file, last_stmt, 0);
+ }
}
gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
@@ -1677,6 +1732,7 @@ optimize_function (cgraph_node *n, function *fn)
for (unsigned i = 0; i < pcalls.length (); i++)
create_cgraph_edge (n, pcalls[i]);
ipa_update_overall_fn_summary (n);
+ renumber_gimple_stmt_uids (DECL_STRUCT_FUNCTION (n->decl));
return 1;
}
@@ -1806,7 +1862,7 @@ pass_ipa_prefetch::gate (function *)
/* Don't bother doing anything if the program has errors. */
&& !seen_error ()
&& flag_lto_partition == LTO_PARTITION_ONE
- /* Only enable struct optimizations in lto or whole_program. */
+ /* Only enable prefetch optimizations in lto or whole_program. */
&& (in_lto_p || flag_whole_program));
}
diff --git a/gcc/ipa-sra.cc b/gcc/ipa-sra.cc
index 5355cf2f4..471b3927c 100644
--- a/gcc/ipa-sra.cc
+++ b/gcc/ipa-sra.cc
@@ -3393,6 +3393,13 @@ param_splitting_across_edge (cgraph_edge *cs)
gcc_checking_assert (from_ifs && from_ifs->m_parameters);
isra_call_summary *csum = call_sums->get (cs);
+ /* TODO: implement better support for call edges inserted after summary
+ collection but before sra wpa invocation. */
+ if (!csum)
+ {
+ csum = call_sums->get_create (cs);
+ csum->m_return_ignored = true;
+ }
gcc_checking_assert (csum);
unsigned args_count = csum->m_arg_flow.length ();
isra_func_summary *to_ifs = func_sums->get (callee);
diff --git a/gcc/params.opt b/gcc/params.opt
index 5c07e3986..50385dfd7 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -314,8 +314,8 @@ Common Joined UInteger Var(param_ipa_prefetch_distance_factor) Init(4) Param Opt
The factor represents the number of inductive variable incrementations to evaluate an indirect memory address for IPA prefetch.
-param=ipa-prefetch-locality=
-Common Joined UInteger Var(param_ipa_prefetch_locality) Init(3) Param Optimization
-The flag represents temporal locality values in the following way: 0:pstl1strm, 1:pstl3keep, 2:pstl2keep, 3:pstl1keep.
+Common Joined UInteger Var(param_ipa_prefetch_locality) Init(3) IntegerRange(0, 3) Param Optimization
+The flag represents temporal locality value between 0 and 3, the higher value means the higher temporal locality in the data.
-param=ira-loop-reserved-regs=
Common Joined UInteger Var(param_ira_loop_reserved_regs) Init(2) Param Optimization
diff --git a/gcc/testsuite/gcc.dg/completion-1.c b/gcc/testsuite/gcc.dg/completion-1.c
index 64da64f1c..df2319c76 100644
--- a/gcc/testsuite/gcc.dg/completion-1.c
+++ b/gcc/testsuite/gcc.dg/completion-1.c
@@ -2,6 +2,7 @@
/* { dg-options "--completion=-fipa-ic" } */
/* { dg-begin-multiline-output "" }
+-fipa-ic
-fipa-icf
-fipa-icf-functions
-fipa-icf-variables
diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-prefetch-xz.c b/gcc/testsuite/gcc.dg/ipa/ipa-prefetch-xz.c
new file mode 100644
index 000000000..bd4fb2bdc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/ipa-prefetch-xz.c
@@ -0,0 +1,1843 @@
+/* { dg-do link } */
+/* { dg-options "-O3 -fipa-ic -fipa-prefetch -flto -flto-partition=one -fdump-ipa-ipa_prefetch -fdump-ipa-icp" } */
+/* { dg-require-effective-target lto } */
+
+/* Based on opensource xz code. */
+
+#include <stdlib.h>
+#include <string.h>
+
+typedef long int ptrdiff_t;
+typedef long unsigned int size_t;
+typedef unsigned int wchar_t;
+
+typedef unsigned char __u_char;
+typedef unsigned short int __u_short;
+typedef unsigned int __u_int;
+typedef unsigned long int __u_long;
+
+typedef signed char __int8_t;
+typedef unsigned char __uint8_t;
+typedef signed short int __int16_t;
+typedef unsigned short int __uint16_t;
+typedef signed int __int32_t;
+typedef unsigned int __uint32_t;
+
+typedef signed long int __int64_t;
+typedef unsigned long int __uint64_t;
+
+typedef __int8_t __int_least8_t;
+typedef __uint8_t __uint_least8_t;
+typedef __int16_t __int_least16_t;
+typedef __uint16_t __uint_least16_t;
+typedef __int32_t __int_least32_t;
+typedef __uint32_t __uint_least32_t;
+typedef __int64_t __int_least64_t;
+typedef __uint64_t __uint_least64_t;
+
+typedef __int8_t int8_t;
+typedef __int16_t int16_t;
+typedef __int32_t int32_t;
+typedef __int64_t int64_t;
+
+typedef __uint8_t uint8_t;
+typedef __uint16_t uint16_t;
+typedef __uint32_t uint32_t;
+typedef __uint64_t uint64_t;
+
+typedef long int intptr_t;
+typedef unsigned long int uintptr_t;
+
+static inline uint16_t
+read16ne(const uint8_t *buf)
+{
+ uint16_t num;
+ memcpy(&num, buf, sizeof(num));
+ return num;
+}
+
+static inline uint32_t
+read32ne(const uint8_t *buf)
+{
+ uint32_t num;
+ memcpy(&num, buf, sizeof(num));
+ return num;
+}
+
+static inline uint16_t
+aligned_read16ne(const uint8_t *buf)
+{
+ uint16_t num;
+ memcpy(&num, __builtin_assume_aligned(buf, sizeof(num)), sizeof(num));
+ return num;
+}
+
+
+static inline uint32_t
+aligned_read32ne(const uint8_t *buf)
+{
+ uint32_t num;
+ memcpy(&num, __builtin_assume_aligned(buf, sizeof(num)), sizeof(num));
+ return num;
+}
+
+static inline uint64_t
+aligned_read64ne(const uint8_t *buf)
+{
+ uint64_t num;
+ memcpy(&num, __builtin_assume_aligned(buf, sizeof(num)), sizeof(num));
+ return num;
+}
+
+typedef unsigned char lzma_bool;
+
+typedef enum {
+ LZMA_RESERVED_ENUM = 0
+} lzma_reserved_enum;
+
+typedef enum {
+ LZMA_OK = 0,
+ LZMA_STREAM_END = 1,
+ LZMA_NO_CHECK = 2,
+ LZMA_UNSUPPORTED_CHECK = 3,
+ LZMA_GET_CHECK = 4,
+ LZMA_MEM_ERROR = 5,
+ LZMA_MEMLIMIT_ERROR = 6,
+ LZMA_FORMAT_ERROR = 7,
+ LZMA_OPTIONS_ERROR = 8,
+ LZMA_DATA_ERROR = 9,
+ LZMA_BUF_ERROR = 10,
+ LZMA_PROG_ERROR = 11,
+} lzma_ret;
+
+typedef enum {
+ LZMA_RUN = 0,
+ LZMA_SYNC_FLUSH = 1,
+ LZMA_FULL_FLUSH = 2,
+ LZMA_FULL_BARRIER = 4,
+ LZMA_FINISH = 3
+} lzma_action;
+
+typedef struct {
+ void *( *alloc)(void *opaque, size_t nmemb, size_t size);
+
+ void ( *free)(void *opaque, void *ptr);
+
+ void *opaque;
+} lzma_allocator;
+
+typedef uint64_t lzma_vli;
+
+typedef enum {
+ LZMA_CHECK_NONE = 0,
+ LZMA_CHECK_CRC32 = 1,
+ LZMA_CHECK_CRC64 = 4,
+ LZMA_CHECK_SHA256 = 10
+} lzma_check;
+
+typedef struct {
+ lzma_vli id;
+ void *options;
+} lzma_filter;
+
+typedef enum {
+ LZMA_MF_HC3 = 0x03,
+ LZMA_MF_HC4 = 0x04,
+ LZMA_MF_BT2 = 0x12,
+ LZMA_MF_BT3 = 0x13,
+ LZMA_MF_BT4 = 0x14
+} lzma_match_finder;
+
+typedef struct lzma_next_coder_s lzma_next_coder;
+
+typedef struct lzma_filter_info_s lzma_filter_info;
+
+typedef lzma_ret (*lzma_init_function)(
+ lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_filter_info *filters);
+
+typedef lzma_ret (*lzma_code_function)(
+ void *coder, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size,
+ lzma_action action);
+
+typedef void (*lzma_end_function)(
+ void *coder, const lzma_allocator *allocator);
+
+struct lzma_filter_info_s {
+ lzma_vli id;
+ lzma_init_function init;
+ void *options;
+};
+
+struct lzma_next_coder_s {
+ void *coder;
+ lzma_vli id;
+ uintptr_t init;
+
+ lzma_code_function code;
+ lzma_end_function end;
+ void (*get_progress)(void *coder,
+ uint64_t *progress_in, uint64_t *progress_out);
+
+ lzma_check (*get_check)(const void *coder);
+ lzma_ret (*memconfig)(void *coder, uint64_t *memusage,
+ uint64_t *old_memlimit, uint64_t new_memlimit);
+ lzma_ret (*update)(void *coder, const lzma_allocator *allocator,
+ const lzma_filter *filters, const lzma_filter *reversed_filters);
+};
+
+typedef struct {
+ uint32_t len;
+ uint32_t dist;
+} lzma_match;
+
+typedef struct lzma_mf_s lzma_mf;
+struct lzma_mf_s {
+ uint8_t *buffer;
+ uint32_t size;
+ uint32_t keep_size_before;
+ uint32_t keep_size_after;
+ uint32_t offset;
+ uint32_t read_pos;
+ uint32_t read_ahead;
+ uint32_t read_limit;
+ uint32_t write_pos;
+ uint32_t pending;
+ uint32_t (*find)(lzma_mf *mf, lzma_match *matches);
+ void (*skip)(lzma_mf *mf, uint32_t num);
+ uint32_t *hash;
+ uint32_t *son;
+ uint32_t cyclic_pos;
+ uint32_t cyclic_size;
+ uint32_t hash_mask;
+ uint32_t depth;
+ uint32_t nice_len;
+ uint32_t match_len_max;
+ lzma_action action;
+ uint32_t hash_count;
+ uint32_t sons_count;
+};
+
+typedef struct {
+ size_t before_size;
+ size_t dict_size;
+ size_t after_size;
+ size_t match_len_max;
+ size_t nice_len;
+ lzma_match_finder match_finder;
+ uint32_t depth;
+ const uint8_t *preset_dict;
+ uint32_t preset_dict_size;
+} lzma_lz_options;
+
+typedef struct {
+ void *coder;
+ lzma_ret (*code)(void *coder,
+ lzma_mf *restrict mf, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size);
+ void (*end)(void *coder, const lzma_allocator *allocator);
+ lzma_ret (*options_update)(void *coder, const lzma_filter *filter);
+} lzma_lz_encoder;
+
+static inline const uint8_t *
+mf_ptr(const lzma_mf *mf)
+{
+ return mf->buffer + mf->read_pos;
+}
+
+static inline uint32_t
+mf_avail(const lzma_mf *mf)
+{
+ return mf->write_pos - mf->read_pos;
+}
+
+typedef struct {
+ uint32_t state[8];
+ uint64_t size;
+} lzma_sha256_state;
+
+typedef struct {
+ union {
+ uint8_t u8[64];
+ uint32_t u32[16];
+ uint64_t u64[8];
+ } buffer;
+ union {
+ uint32_t crc32;
+ uint64_t crc64;
+ lzma_sha256_state sha256;
+ } state;
+} lzma_check_state;
+
+// The table is constantly initialized in the original code.
+// Skip it in the test.
+const uint32_t lzma_crc32_table[8][256];
+
+static inline uint32_t __attribute__((__always_inline__))
+lzma_memcmplen(const uint8_t *buf1, const uint8_t *buf2,
+ uint32_t len, uint32_t limit)
+{
+ while (len < limit) {
+ uint32_t x = read32ne(buf1 + len) - read32ne(buf2 + len);
+ if (x != 0) {
+ if ((x & 0xFFFF) == 0) {
+ len += 2;
+ x >>= 16;
+ }
+
+ if ((x & 0xFF) == 0)
+ ++len;
+
+ return ((len) < (limit) ? (len) : (limit));
+ }
+
+ len += 4;
+ }
+
+ return limit;
+}
+
+extern uint32_t
+lzma_mf_find(lzma_mf *mf, uint32_t *count_ptr, lzma_match *matches)
+{
+ const uint32_t count = mf->find(mf, matches);
+ uint32_t len_best = 0;
+
+ if (count > 0) {
+ len_best = matches[count - 1].len;
+ if (len_best == mf->nice_len) {
+ uint32_t limit = mf_avail(mf) + 1;
+ if (limit > mf->match_len_max)
+ limit = mf->match_len_max;
+ const uint8_t *p1 = mf_ptr(mf) - 1;
+ const uint8_t *p2 = p1 - matches[count - 1].dist - 1;
+ len_best = lzma_memcmplen(p1, p2, len_best, limit);
+ }
+ }
+
+ *count_ptr = count;
+ ++mf->read_ahead;
+
+ return len_best;
+}
+
+static void
+normalize(lzma_mf *mf)
+{
+ const uint32_t subvalue = ((4294967295U) - mf->cyclic_size);
+
+ for (uint32_t i = 0; i < mf->hash_count; ++i) {
+ if (mf->hash[i] <= subvalue)
+ mf->hash[i] = 0;
+ else
+ mf->hash[i] -= subvalue;
+ }
+
+ for (uint32_t i = 0; i < mf->sons_count; ++i) {
+ if (mf->son[i] <= subvalue)
+ mf->son[i] = 0;
+ else
+ mf->son[i] -= subvalue;
+ }
+
+ mf->offset -= subvalue;
+ return;
+}
+
+static void
+move_pos(lzma_mf *mf)
+{
+ if (++mf->cyclic_pos == mf->cyclic_size)
+ mf->cyclic_pos = 0;
+ ++mf->read_pos;
+ if (__builtin_expect(mf->read_pos + mf->offset == (4294967295U), 0 ))
+ normalize(mf);
+}
+
+static void
+move_pending(lzma_mf *mf)
+{
+ ++mf->read_pos;
+ ++mf->pending;
+}
+
+static lzma_match *
+hc_find_func(
+ const uint32_t len_limit,
+ const uint32_t pos,
+ const uint8_t *const cur,
+ uint32_t cur_match,
+ uint32_t depth,
+ uint32_t *const son,
+ const uint32_t cyclic_pos,
+ const uint32_t cyclic_size,
+ lzma_match *matches,
+ uint32_t len_best)
+{
+ son[cyclic_pos] = cur_match;
+
+ while (1) {
+ const uint32_t delta = pos - cur_match;
+ if (depth-- == 0 || delta >= cyclic_size)
+ return matches;
+
+ const uint8_t *const pb = cur - delta;
+ cur_match = son[cyclic_pos - delta
+ + (delta > cyclic_pos ? cyclic_size : 0)];
+
+ if (pb[len_best] == cur[len_best] && pb[0] == cur[0]) {
+ uint32_t len = lzma_memcmplen(pb, cur, 1, len_limit);
+
+ if (len_best < len) {
+ len_best = len;
+ matches->len = len;
+ matches->dist = delta - 1;
+ ++matches;
+
+ if (len == len_limit)
+ return matches;
+ }
+ }
+ }
+}
+
+extern uint32_t
+lzma_mf_hc3_find(lzma_mf *mf, lzma_match *matches)
+{
+ uint32_t len_limit = mf_avail(mf);
+ if (mf->nice_len <= len_limit) {
+ len_limit = mf->nice_len;
+ } else if (len_limit < (3)) {
+ move_pending(mf);
+ return 0;
+ }
+ const uint8_t *cur = mf_ptr(mf);
+ const uint32_t pos = mf->read_pos + mf->offset;
+ uint32_t matches_count = 0;
+
+ const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1];
+ const uint32_t hash_2_value = temp & ((1U << 10) - 1);
+ const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)) & mf->hash_mask;
+
+ const uint32_t delta2 = pos - mf->hash[hash_2_value];
+ const uint32_t cur_match = mf->hash[((1U << 10)) + hash_value];
+
+ mf->hash[hash_2_value] = pos;
+ mf->hash[((1U << 10)) + hash_value] = pos;
+
+ uint32_t len_best = 2;
+
+ if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) {
+ len_best = lzma_memcmplen(cur - delta2, cur, len_best, len_limit);
+
+ matches[0].len = len_best;
+ matches[0].dist = delta2 - 1;
+ matches_count = 1;
+
+ if (len_best == len_limit) {
+ mf->son[mf->cyclic_pos] = cur_match;
+ move_pos(mf);
+ return 1;
+ }
+ }
+
+ matches_count = hc_find_func(len_limit, pos, cur, cur_match, mf->depth,
+ mf->son, mf->cyclic_pos, mf->cyclic_size,
+ matches + matches_count, len_best) - matches;
+ move_pos(mf);
+ return matches_count;
+}
+
+extern void
+lzma_mf_hc3_skip(lzma_mf *mf, uint32_t amount)
+{
+ do {
+ if (mf_avail(mf) < 3) {
+ move_pending(mf);
+ continue;
+ }
+
+ const uint8_t *cur = mf_ptr(mf);
+ const uint32_t pos = mf->read_pos + mf->offset;
+
+ const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1];
+ const uint32_t hash_2_value = temp & ((1U << 10) - 1);
+ const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)) & mf->hash_mask;
+
+ const uint32_t cur_match
+ = mf->hash[((1U << 10)) + hash_value];
+
+ mf->hash[hash_2_value] = pos;
+ mf->hash[((1U << 10)) + hash_value] = pos;
+
+ do { mf->son[mf->cyclic_pos] = cur_match; move_pos(mf); } while (0);
+
+ } while (--amount != 0);
+}
+
+extern uint32_t
+lzma_mf_hc4_find(lzma_mf *mf, lzma_match *matches)
+{
+ uint32_t len_limit = mf_avail(mf);
+ if (mf->nice_len <= len_limit) {
+ len_limit = mf->nice_len;
+ } else if (len_limit < (4)) {
+ move_pending(mf);
+ return 0;
+ }
+ const uint8_t *cur = mf_ptr(mf);
+ const uint32_t pos = mf->read_pos + mf->offset;
+ uint32_t matches_count = 0;
+
+ const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1];
+ const uint32_t hash_2_value = temp & ((1U << 10) - 1);
+ const uint32_t hash_3_value = (temp ^ ((uint32_t)(cur[2]) << 8))
+ & ((1U << 16) - 1);
+ const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)
+ ^ (lzma_crc32_table[0][cur[3]] << 5))
+ & mf->hash_mask;
+ uint32_t delta2 = pos - mf->hash[hash_2_value];
+ const uint32_t delta3
+ = pos - mf->hash[((1U << 10)) + hash_3_value];
+ const uint32_t cur_match = mf->hash[((1U << 10) + (1U << 16)) + hash_value];
+
+ mf->hash[hash_2_value ] = pos;
+ mf->hash[((1U << 10)) + hash_3_value] = pos;
+ mf->hash[((1U << 10) + (1U << 16)) + hash_value] = pos;
+
+ uint32_t len_best = 1;
+
+ if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) {
+ len_best = 2;
+ matches[0].len = 2;
+ matches[0].dist = delta2 - 1;
+ matches_count = 1;
+ }
+
+ if (delta2 != delta3 && delta3 < mf->cyclic_size
+ && *(cur - delta3) == *cur) {
+ len_best = 3;
+ matches[matches_count++].dist = delta3 - 1;
+ delta2 = delta3;
+ }
+
+ if (matches_count != 0) {
+ len_best = lzma_memcmplen(cur - delta2, cur,
+ len_best, len_limit);
+
+ matches[matches_count - 1].len = len_best;
+
+ if (len_best == len_limit) {
+ mf->son[mf->cyclic_pos] = cur_match; move_pos(mf);
+ return matches_count;
+ }
+ }
+
+ if (len_best < 3)
+ len_best = 3;
+
+ matches_count = hc_find_func(len_limit, pos, cur, cur_match, mf->depth,
+ mf->son, mf->cyclic_pos, mf->cyclic_size,
+ matches + matches_count, len_best) - matches;
+ move_pos(mf);
+ return matches_count;
+}
+
+extern void
+lzma_mf_hc4_skip(lzma_mf *mf, uint32_t amount)
+{
+ do {
+ if (mf_avail(mf) < 4) {
+ move_pending(mf);
+ continue;
+ }
+
+ const uint8_t *cur = mf_ptr(mf);
+ const uint32_t pos = mf->read_pos + mf->offset;
+
+ const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1];
+ const uint32_t hash_2_value = temp & ((1U << 10) - 1);
+ const uint32_t hash_3_value = (temp ^ ((uint32_t)(cur[2]) << 8)) & ((1U << 16) - 1);
+ const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)
+ ^ (lzma_crc32_table[0][cur[3]] << 5))
+ & mf->hash_mask;
+
+ const uint32_t cur_match
+ = mf->hash[((1U << 10) + (1U << 16)) + hash_value];
+
+ mf->hash[hash_2_value] = pos;
+ mf->hash[((1U << 10)) + hash_3_value] = pos;
+ mf->hash[((1U << 10) + (1U << 16)) + hash_value] = pos;
+
+ mf->son[mf->cyclic_pos] = cur_match;
+ move_pos(mf);
+ } while (--amount != 0);
+}
+
+static lzma_match *
+bt_find_func(
+ const uint32_t len_limit,
+ const uint32_t pos,
+ const uint8_t *const cur,
+ uint32_t cur_match,
+ uint32_t depth,
+ uint32_t *const son,
+ const uint32_t cyclic_pos,
+ const uint32_t cyclic_size,
+ lzma_match *matches,
+ uint32_t len_best)
+{
+ uint32_t *ptr0 = son + (cyclic_pos << 1) + 1;
+ uint32_t *ptr1 = son + (cyclic_pos << 1);
+
+ uint32_t len0 = 0;
+ uint32_t len1 = 0;
+
+ while (1) {
+ const uint32_t delta = pos - cur_match;
+ if (depth-- == 0 || delta >= cyclic_size) {
+ *ptr0 = 0;
+ *ptr1 = 0;
+ return matches;
+ }
+
+ uint32_t *const pair = son + ((cyclic_pos - delta
+ + (delta > cyclic_pos ? cyclic_size : 0))
+ << 1);
+
+ const uint8_t *const pb = cur - delta;
+ uint32_t len = ((len0) < (len1) ? (len0) : (len1));
+
+ if (pb[len] == cur[len]) {
+ len = lzma_memcmplen(pb, cur, len + 1, len_limit);
+
+ if (len_best < len) {
+ len_best = len;
+ matches->len = len;
+ matches->dist = delta - 1;
+ ++matches;
+
+ if (len == len_limit) {
+ *ptr1 = pair[0];
+ *ptr0 = pair[1];
+ return matches;
+ }
+ }
+ }
+
+ if (pb[len] < cur[len]) {
+ *ptr1 = cur_match;
+ ptr1 = pair + 1;
+ cur_match = *ptr1;
+ len1 = len;
+ } else {
+ *ptr0 = cur_match;
+ ptr0 = pair;
+ cur_match = *ptr0;
+ len0 = len;
+ }
+ }
+}
+
+
+static void
+bt_skip_func(
+ const uint32_t len_limit,
+ const uint32_t pos,
+ const uint8_t *const cur,
+ uint32_t cur_match,
+ uint32_t depth,
+ uint32_t *const son,
+ const uint32_t cyclic_pos,
+ const uint32_t cyclic_size)
+{
+ uint32_t *ptr0 = son + (cyclic_pos << 1) + 1;
+ uint32_t *ptr1 = son + (cyclic_pos << 1);
+
+ uint32_t len0 = 0;
+ uint32_t len1 = 0;
+
+ while (1) {
+ const uint32_t delta = pos - cur_match;
+ if (depth-- == 0 || delta >= cyclic_size) {
+ *ptr0 = 0;
+ *ptr1 = 0;
+ return;
+ }
+
+ uint32_t *pair = son + ((cyclic_pos - delta
+ + (delta > cyclic_pos ? cyclic_size : 0))
+ << 1);
+ const uint8_t *pb = cur - delta;
+ uint32_t len = ((len0) < (len1) ? (len0) : (len1));
+
+ if (pb[len] == cur[len]) {
+ len = lzma_memcmplen(pb, cur, len + 1, len_limit);
+
+ if (len == len_limit) {
+ *ptr1 = pair[0];
+ *ptr0 = pair[1];
+ return;
+ }
+ }
+
+ if (pb[len] < cur[len]) {
+ *ptr1 = cur_match;
+ ptr1 = pair + 1;
+ cur_match = *ptr1;
+ len1 = len;
+ } else {
+ *ptr0 = cur_match;
+ ptr0 = pair;
+ cur_match = *ptr0;
+ len0 = len;
+ }
+ }
+}
+
+extern uint32_t
+lzma_mf_bt2_find(lzma_mf *mf, lzma_match *matches)
+{
+ uint32_t len_limit = mf_avail(mf);
+ if (mf->nice_len <= len_limit) {
+ len_limit = mf->nice_len;
+ } else if (len_limit < (2) || (mf->action == LZMA_SYNC_FLUSH)) {
+ move_pending(mf);
+ return 0;
+ }
+ const uint8_t *cur = mf_ptr(mf);
+ const uint32_t pos = mf->read_pos + mf->offset;
+ uint32_t matches_count = 0;
+ const uint32_t hash_value = read16ne(cur);
+ const uint32_t cur_match = mf->hash[hash_value];
+ mf->hash[hash_value] = pos;
+
+ matches_count = bt_find_func(len_limit, pos, cur, cur_match, mf->depth,
+ mf->son, mf->cyclic_pos, mf->cyclic_size,
+ matches + matches_count, 1) - matches;
+ move_pos(mf);
+ return matches_count;
+}
+
+extern void
+lzma_mf_bt2_skip(lzma_mf *mf, uint32_t amount)
+{
+ do {
+ uint32_t len_limit = mf_avail(mf);
+ if (mf->nice_len <= len_limit) {
+ len_limit = mf->nice_len;
+ } else if (len_limit < (2) || (mf->action == LZMA_SYNC_FLUSH)) {
+ move_pending(mf);
+ continue;
+ }
+ const uint8_t *cur = mf_ptr(mf);
+ const uint32_t pos = mf->read_pos + mf->offset;
+
+ const uint32_t hash_value = read16ne(cur);
+ const uint32_t cur_match = mf->hash[hash_value];
+ mf->hash[hash_value] = pos;
+
+ bt_skip_func(len_limit, pos, cur, cur_match, mf->depth, mf->son,
+ mf->cyclic_pos, mf->cyclic_size);
+ move_pos(mf);
+ } while (--amount != 0);
+}
+
+extern uint32_t
+lzma_mf_bt3_find(lzma_mf *mf, lzma_match *matches)
+{
+ uint32_t len_limit = mf_avail(mf);
+ if (mf->nice_len <= len_limit) {
+ len_limit = mf->nice_len;
+ } else if (len_limit < (3) || (1 && mf->action == LZMA_SYNC_FLUSH)) {
+ move_pending(mf);
+ return 0;
+ }
+ const uint8_t *cur = mf_ptr(mf);
+ const uint32_t pos = mf->read_pos + mf->offset;
+ uint32_t matches_count = 0;
+
+ const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1];
+ const uint32_t hash_2_value = temp & ((1U << 10) - 1);
+ const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)) & mf->hash_mask;
+
+ const uint32_t delta2 = pos - mf->hash[hash_2_value];
+ const uint32_t cur_match = mf->hash[((1U << 10)) + hash_value];
+
+ mf->hash[hash_2_value] = pos;
+ mf->hash[((1U << 10)) + hash_value] = pos;
+
+ uint32_t len_best = 2;
+
+ if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) {
+ len_best = lzma_memcmplen(
+ cur, cur - delta2, len_best, len_limit);
+
+ matches[0].len = len_best;
+ matches[0].dist = delta2 - 1;
+ matches_count = 1;
+
+ if (len_best == len_limit) {
+ bt_skip_func(len_limit, pos, cur, cur_match, mf->depth, mf->son,
+ mf->cyclic_pos, mf->cyclic_size);
+ move_pos(mf);
+ return 1;
+ }
+ }
+
+ matches_count = bt_find_func(len_limit, pos, cur, cur_match, mf->depth,
+ mf->son, mf->cyclic_pos, mf->cyclic_size,
+ matches + matches_count, len_best) - matches;
+ move_pos(mf);
+ return matches_count;
+}
+
+
+extern void
+lzma_mf_bt3_skip(lzma_mf *mf, uint32_t amount)
+{
+ do {
+ uint32_t len_limit = mf_avail(mf);
+ if (mf->nice_len <= len_limit) {
+ len_limit = mf->nice_len; }
+ else if (len_limit < (3) || (1 && mf->action == LZMA_SYNC_FLUSH)) {
+ move_pending(mf);
+ continue;
+ }
+ const uint8_t *cur = mf_ptr(mf);
+ const uint32_t pos = mf->read_pos + mf->offset;
+
+ const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1];
+ const uint32_t hash_2_value = temp & ((1U << 10) - 1);
+ const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)) & mf->hash_mask;
+
+ const uint32_t cur_match = mf->hash[((1U << 10)) + hash_value];
+
+ mf->hash[hash_2_value] = pos;
+ mf->hash[((1U << 10)) + hash_value] = pos;
+
+ bt_skip_func(len_limit, pos, cur, cur_match, mf->depth, mf->son,
+ mf->cyclic_pos, mf->cyclic_size);
+ move_pos(mf);
+ } while (--amount != 0);
+}
+
+extern uint32_t
+lzma_mf_bt4_find(lzma_mf *mf, lzma_match *matches)
+{
+ uint32_t len_limit = mf->write_pos - mf->read_pos;
+ if (mf->nice_len <= len_limit) {
+ len_limit = mf->nice_len;
+ } else if (len_limit < (4) || (mf->action == LZMA_SYNC_FLUSH)) {
+ ++mf->read_pos;
+ ++mf->pending;
+ return 0;
+ }
+
+ const uint8_t *cur = mf->buffer + mf->read_pos;
+ const uint32_t pos = mf->read_pos + mf->offset;
+ uint32_t matches_count = 0;
+
+ const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1];
+ const uint32_t hash_2_value = temp & ((1U << 10) - 1);
+ const uint32_t hash_3_value = (temp ^ ((uint32_t)(cur[2]) << 8)) & ((1U << 16) - 1);
+ const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)
+ ^ (lzma_crc32_table[0][cur[3]] << 5))
+ & mf->hash_mask;
+
+ uint32_t delta2 = pos - mf->hash[hash_2_value];
+ const uint32_t delta3 = pos - mf->hash[((1U << 10)) + hash_3_value];
+ const uint32_t cur_match = mf->hash[((1U << 10) + (1U << 16)) + hash_value];
+
+ mf->hash[hash_2_value] = pos;
+ mf->hash[((1U << 10)) + hash_3_value] = pos;
+ mf->hash[((1U << 10) + (1U << 16)) + hash_value] = pos;
+
+ uint32_t len_best = 1;
+
+ if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) {
+ len_best = 2;
+ matches[0].len = 2;
+ matches[0].dist = delta2 - 1;
+ matches_count = 1;
+ }
+
+ if (delta2 != delta3 && delta3 < mf->cyclic_size && *(cur - delta3) == *cur) {
+ len_best = 3;
+ matches[matches_count++].dist = delta3 - 1;
+ delta2 = delta3;
+ }
+
+ if (matches_count != 0) {
+ len_best = lzma_memcmplen(cur, cur - delta2, len_best, len_limit);
+
+ matches[matches_count - 1].len = len_best;
+
+ if (len_best == len_limit) {
+ bt_skip_func(len_limit, pos, cur, cur_match, mf->depth, mf->son,
+ mf->cyclic_pos, mf->cyclic_size);
+ move_pos(mf);
+ return matches_count;
+ }
+ }
+
+ if (len_best < 3)
+ len_best = 3;
+
+ matches_count = bt_find_func(len_limit, pos, cur, cur_match, mf->depth, mf->son,
+ mf->cyclic_pos, mf->cyclic_size,
+ matches + matches_count, len_best) - matches;
+ move_pos(mf);
+ return matches_count;
+}
+
+extern void
+lzma_mf_bt4_skip(lzma_mf *mf, uint32_t amount)
+{
+ do {
+ uint32_t len_limit = mf_avail(mf);
+ if (mf->nice_len <= len_limit) {
+ len_limit = mf->nice_len;
+ } else if (len_limit < (4) || (mf->action == LZMA_SYNC_FLUSH)) {
+ move_pending(mf);
+ continue;
+ }
+
+ const uint8_t *cur = mf->buffer + mf->read_pos;
+ const uint32_t pos = mf->read_pos + mf->offset;
+
+ const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1];
+ const uint32_t hash_2_value = temp & ((1U << 10) - 1);
+ const uint32_t hash_3_value = (temp ^ ((uint32_t)(cur[2]) << 8))
+ & ((1U << 16) - 1);
+ const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)
+ ^ (lzma_crc32_table[0][cur[3]] << 5))
+ & mf->hash_mask;
+
+ const uint32_t cur_match = mf->hash[((1U << 10) + (1U << 16)) + hash_value];
+
+ mf->hash[hash_2_value] = pos;
+ mf->hash[((1U << 10)) + hash_3_value] = pos;
+ mf->hash[((1U << 10) + (1U << 16)) + hash_value] = pos;
+
+ bt_skip_func(len_limit, pos, cur, cur_match, mf->depth, mf->son,
+ mf->cyclic_pos, mf->cyclic_size);
+ move_pos(mf);
+ } while (--amount != 0);
+}
+
+static inline void
+mf_skip(lzma_mf *mf, uint32_t amount)
+{
+ if (amount != 0) {
+ mf->skip(mf, amount);
+ mf->read_ahead += amount;
+ }
+}
+
+typedef struct lzma_lzma1_encoder_s lzma_lzma1_encoder;
+typedef uint16_t probability;
+
+typedef struct {
+ probability choice;
+ probability choice2;
+ probability low[(1 << 4)][(1 << 3)];
+ probability mid[(1 << 4)][(1 << 3)];
+ probability high[(1 << 8)];
+ uint32_t prices[(1 << 4)][((1 << 3) + (1 << 3) + (1 << 8))];
+ uint32_t table_size;
+ uint32_t counters[(1 << 4)];
+} lzma_length_encoder;
+
+typedef struct {
+ uint64_t low;
+ uint64_t cache_size;
+ uint32_t range;
+ uint8_t cache;
+ size_t count;
+ size_t pos;
+
+ enum {
+ RC_BIT_0,
+ RC_BIT_1,
+ RC_DIRECT_0,
+ RC_DIRECT_1,
+ RC_FLUSH,
+ } symbols[58];
+
+ probability *probs[58];
+} lzma_range_encoder;
+
+
+typedef enum {
+ STATE_LIT_LIT,
+ STATE_MATCH_LIT_LIT,
+ STATE_REP_LIT_LIT,
+ STATE_SHORTREP_LIT_LIT,
+ STATE_MATCH_LIT,
+ STATE_REP_LIT,
+ STATE_SHORTREP_LIT,
+ STATE_LIT_MATCH,
+ STATE_LIT_LONGREP,
+ STATE_LIT_SHORTREP,
+ STATE_NONLIT_MATCH,
+ STATE_NONLIT_REP,
+} lzma_lzma_state;
+
+typedef struct {
+ lzma_lzma_state state;
+ _Bool prev_1_is_literal;
+ _Bool prev_2;
+
+ uint32_t pos_prev_2;
+ uint32_t back_prev_2;
+
+ uint32_t price;
+ uint32_t pos_prev;
+ uint32_t back_prev;
+
+ uint32_t backs[4];
+} lzma_optimal;
+
+struct lzma_lzma1_encoder_s {
+ lzma_range_encoder rc;
+ lzma_lzma_state state;
+ uint32_t reps[4];
+ lzma_match matches[(2 + ((1 << 3) + (1 << 3) + (1 << 8)) - 1) + 1];
+ uint32_t matches_count;
+ uint32_t longest_match_length;
+ _Bool fast_mode;
+ _Bool is_initialized;
+ _Bool is_flushed;
+ uint32_t pos_mask;
+ uint32_t literal_context_bits;
+ uint32_t literal_pos_mask;
+
+ probability literal[(1 << 4)][0x300];
+ probability is_match[12][(1 << 4)];
+ probability is_rep[12];
+ probability is_rep0[12];
+ probability is_rep1[12];
+ probability is_rep2[12];
+ probability is_rep0_long[12][(1 << 4)];
+ probability dist_slot[4][(1 << 6)];
+ probability dist_special[(1 << (14 / 2)) - 14];
+ probability dist_align[(1 << 4)];
+
+ lzma_length_encoder match_len_encoder;
+ lzma_length_encoder rep_len_encoder;
+
+ uint32_t dist_slot_prices[4][(1 << 6)];
+ uint32_t dist_prices[4][(1 << (14 / 2))];
+ uint32_t dist_table_size;
+ uint32_t match_price_count;
+
+ uint32_t align_prices[(1 << 4)];
+ uint32_t align_price_count;
+ uint32_t opts_end_index;
+ uint32_t opts_current_index;
+ lzma_optimal opts[(1 << 12)];
+};
+
+extern void
+lzma_lzma_optimum_fast(lzma_lzma1_encoder *restrict coder,
+ lzma_mf *restrict mf,
+ uint32_t *restrict back_res, uint32_t *restrict len_res)
+{
+ const uint32_t nice_len = mf->nice_len;
+
+ uint32_t len_main;
+ uint32_t matches_count;
+ if (mf->read_ahead == 0) {
+ len_main = lzma_mf_find(mf, &matches_count, coder->matches);
+ } else {
+ len_main = coder->longest_match_length;
+ matches_count = coder->matches_count;
+ }
+
+ const uint8_t *buf = mf_ptr(mf) - 1;
+ const uint32_t buf_avail
+ = ((mf_avail(mf) + 1) < ((2 + ((1 << 3) + (1 << 3) + (1 << 8)) - 1))
+ ? (mf_avail(mf) + 1) : ((2 + ((1 << 3) + (1 << 3) + (1 << 8)) - 1)));
+
+ if (buf_avail < 2) {
+ *back_res = (4294967295U);
+ *len_res = 1;
+ return;
+ }
+
+ uint32_t rep_len = 0;
+ uint32_t rep_index = 0;
+
+ for (uint32_t i = 0; i < 4; ++i) {
+ const uint8_t *const buf_back = buf - coder->reps[i] - 1;
+ if ((read16ne(buf) != read16ne(buf_back)))
+ continue;
+ const uint32_t len = lzma_memcmplen(buf, buf_back, 2, buf_avail);
+ if (len >= nice_len) {
+ *back_res = i;
+ *len_res = len;
+ mf_skip(mf, len - 1);
+ return;
+ }
+ if (len > rep_len) {
+ rep_index = i;
+ rep_len = len;
+ }
+ }
+ if (len_main >= nice_len) {
+ *back_res = coder->matches[matches_count - 1].dist + 4;
+ *len_res = len_main;
+ mf_skip(mf, len_main - 1);
+ return;
+ }
+
+ uint32_t back_main = 0;
+ if (len_main >= 2) {
+ back_main = coder->matches[matches_count - 1].dist;
+ while (matches_count > 1 && len_main ==
+ coder->matches[matches_count - 2].len + 1) {
+ if (!(((back_main) >> 7) > (coder->matches[ matches_count - 2].dist)))
+ break;
+ --matches_count;
+ len_main = coder->matches[matches_count - 1].len;
+ back_main = coder->matches[matches_count - 1].dist;
+ }
+ if (len_main == 2 && back_main >= 0x80)
+ len_main = 1;
+ }
+
+ if (rep_len >= 2) {
+ if (rep_len + 1 >= len_main
+ || (rep_len + 2 >= len_main
+ && back_main > (1U << 9))
+ || (rep_len + 3 >= len_main
+ && back_main > (1U << 15))) {
+ *back_res = rep_index;
+ *len_res = rep_len;
+ mf_skip(mf, rep_len - 1);
+ return;
+ }
+ }
+
+ if (len_main < 2 || buf_avail <= 2) {
+ *back_res = (4294967295U);
+ *len_res = 1;
+ return;
+ }
+
+ coder->longest_match_length = lzma_mf_find(mf,
+ &coder->matches_count, coder->matches);
+
+ if (coder->longest_match_length >= 2) {
+ const uint32_t new_dist = coder->matches[
+ coder->matches_count - 1].dist;
+
+ if ((coder->longest_match_length >= len_main
+ && new_dist < back_main)
+ || (coder->longest_match_length == len_main + 1
+ && !(((new_dist) >> 7) > (back_main)))
+ || (coder->longest_match_length > len_main + 1)
+ || (coder->longest_match_length + 1 >= len_main
+ && len_main >= 3
+ && (((back_main) >> 7) > (new_dist)))) {
+ *back_res = (4294967295U);
+ *len_res = 1;
+ return;
+ }
+ }
+ ++buf;
+ const uint32_t limit = ((2) > (len_main - 1) ? (2) : (len_main - 1));
+ for (uint32_t i = 0; i < 4; ++i) {
+ if (memcmp(buf, buf - coder->reps[i] - 1, limit) == 0) {
+ *back_res = (4294967295U);
+ *len_res = 1;
+ return;
+ }
+ }
+
+ *back_res = back_main + 4;
+ *len_res = len_main;
+ mf_skip(mf, len_main - 2);
+ return;
+}
+
+static inline void
+rc_bit(lzma_range_encoder *rc, probability *prob, uint32_t bit)
+{
+ rc->symbols[rc->count] = bit;
+ rc->probs[rc->count] = prob;
+ ++rc->count;
+}
+
+static inline void
+rc_bittree(lzma_range_encoder *rc, probability *probs,
+ uint32_t bit_count, uint32_t symbol)
+{
+ uint32_t model_index = 1;
+
+ do {
+ const uint32_t bit = (symbol >> --bit_count) & 1;
+ rc_bit(rc, &probs[model_index], bit);
+ model_index = (model_index << 1) + bit;
+ } while (bit_count != 0);
+}
+
+static _Bool
+encode_init(lzma_lzma1_encoder *coder, lzma_mf *mf)
+{
+ if (mf->read_pos == mf->read_limit) {
+ if (mf->action == LZMA_RUN)
+ return 0;
+ } else {
+ mf_skip(mf, 1);
+ mf->read_ahead = 0;
+ rc_bit(&coder->rc, &coder->is_match[0][0], 0);
+ rc_bittree(&coder->rc, coder->literal[0], 8, mf->buffer[0]);
+ }
+
+ coder->is_initialized = 1;
+
+ return 1;
+}
+
+static inline uint32_t
+mf_position(const lzma_mf *mf)
+{
+ return mf->read_pos - mf->read_ahead;
+}
+
+static inline _Bool
+rc_shift_low(lzma_range_encoder *rc,
+ uint8_t *out, size_t *out_pos, size_t out_size)
+{
+ if ((uint32_t)(rc->low) < (uint32_t)(0xFF000000)
+ || (uint32_t)(rc->low >> 32) != 0) {
+ do {
+ if (*out_pos == out_size)
+ return 1;
+
+ out[*out_pos] = rc->cache + (uint8_t)(rc->low >> 32);
+ ++*out_pos;
+ rc->cache = 0xFF;
+ } while (--rc->cache_size != 0);
+ rc->cache = (rc->low >> 24) & 0xFF;
+ }
+
+ ++rc->cache_size;
+ rc->low = (rc->low & 0x00FFFFFF) << 8;
+ return 0;
+}
+
+static inline void
+rc_reset(lzma_range_encoder *rc)
+{
+ rc->low = 0;
+ rc->cache_size = 1;
+ rc->range = (4294967295U);
+ rc->cache = 0;
+ rc->count = 0;
+ rc->pos = 0;
+}
+
+static inline _Bool
+rc_encode(lzma_range_encoder *rc,
+ uint8_t *out, size_t *out_pos, size_t out_size)
+{
+ while (rc->pos < rc->count) {
+ if (rc->range < (1U << 24)) {
+ if (rc_shift_low(rc, out, out_pos, out_size))
+ return 1;
+ rc->range <<= 8;
+ }
+
+ switch (rc->symbols[rc->pos]) {
+ case RC_BIT_0: {
+ probability prob = *rc->probs[rc->pos];
+ rc->range = (rc->range >> 11)
+ * prob;
+ prob += ((1U << 11) - prob) >> 5;
+ *rc->probs[rc->pos] = prob;
+ break;
+ }
+
+ case RC_BIT_1: {
+ probability prob = *rc->probs[rc->pos];
+ const uint32_t bound = prob * (rc->range
+ >> 11);
+ rc->low += bound;
+ rc->range -= bound;
+ prob -= prob >> 5;
+ *rc->probs[rc->pos] = prob;
+ break;
+ }
+
+ case RC_DIRECT_0:
+ rc->range >>= 1;
+ break;
+
+ case RC_DIRECT_1:
+ rc->range >>= 1;
+ rc->low += rc->range;
+ break;
+
+ case RC_FLUSH:
+ rc->range = (4294967295U);
+ do {
+ if (rc_shift_low(rc, out, out_pos, out_size))
+ return 1;
+ } while (++rc->pos < rc->count);
+
+ rc_reset(rc);
+ return 0;
+
+ default:
+ break;
+ }
+ ++rc->pos;
+ }
+
+ rc->count = 0;
+ rc->pos = 0;
+ return 0;
+}
+
+static inline uint64_t
+rc_pending(const lzma_range_encoder *rc)
+{
+ return rc->cache_size + 5 - 1;
+}
+
+static inline void
+literal_matched(lzma_range_encoder *rc, probability *subcoder,
+ uint32_t match_byte, uint32_t symbol)
+{
+ uint32_t offset = 0x100;
+ symbol += 1U << 8;
+
+ do {
+ match_byte <<= 1;
+ const uint32_t match_bit = match_byte & offset;
+ const uint32_t subcoder_index
+ = offset + match_bit + (symbol >> 8);
+ const uint32_t bit = (symbol >> 7) & 1;
+ rc_bit(rc, &subcoder[subcoder_index], bit);
+
+ symbol <<= 1;
+ offset &= ~(match_byte ^ symbol);
+
+ } while (symbol < (1U << 16));
+}
+
+static inline void
+literal(lzma_lzma1_encoder *coder, lzma_mf *mf, uint32_t position)
+{
+ const uint8_t cur_byte = mf->buffer[mf->read_pos - mf->read_ahead];
+ probability *subcoder = ((coder->literal)[
+ (((position) & (coder->literal_pos_mask))
+ << (coder->literal_context_bits))
+ + ((uint32_t)(mf->buffer[mf->read_pos - mf->read_ahead - 1])
+ >> (8U - (coder->literal_context_bits)))]);
+
+ if (((coder->state) < 7)) {
+ rc_bittree(&coder->rc, subcoder, 8, cur_byte);
+ } else {
+ const uint8_t match_byte
+ = mf->buffer[mf->read_pos - coder->reps[0] - 1 - mf->read_ahead];
+ literal_matched(&coder->rc, subcoder, match_byte, cur_byte);
+ }
+ coder->state
+ = ((coder->state) <= STATE_SHORTREP_LIT_LIT
+ ? STATE_LIT_LIT : ((coder->state) <= STATE_LIT_SHORTREP
+ ? (coder->state) - 3 : (coder->state) - 6));
+}
+
+const uint8_t lzma_rc_prices[] = {
+ 128, 103, 91, 84, 78, 73, 69, 66,
+ 63, 61, 58, 56, 54, 52, 51, 49,
+ 48, 46, 45, 44, 43, 42, 41, 40,
+ 39, 38, 37, 36, 35, 34, 34, 33,
+ 32, 31, 31, 30, 29, 29, 28, 28,
+ 27, 26, 26, 25, 25, 24, 24, 23,
+ 23, 22, 22, 22, 21, 21, 20, 20,
+ 19, 19, 19, 18, 18, 17, 17, 17,
+ 16, 16, 16, 15, 15, 15, 14, 14,
+ 14, 13, 13, 13, 12, 12, 12, 11,
+ 11, 11, 11, 10, 10, 10, 10, 9,
+ 9, 9, 9, 8, 8, 8, 8, 7,
+ 7, 7, 7, 6, 6, 6, 6, 5,
+ 5, 5, 5, 5, 4, 4, 4, 4,
+ 3, 3, 3, 3, 3, 2, 2, 2,
+ 2, 2, 2, 1, 1, 1, 1, 1
+};
+
+static inline uint32_t
+rc_bit_price(const probability prob, const uint32_t bit)
+{
+ return lzma_rc_prices[(prob ^ ((0U - bit)
+ & ((1U << 11) - 1))) >> 4];
+}
+
+static inline uint32_t
+rc_bit_0_price(const probability prob)
+{
+ return lzma_rc_prices[prob >> 4];
+}
+
+static inline uint32_t
+rc_bit_1_price(const probability prob)
+{
+ return lzma_rc_prices[(prob ^ ((1U << 11) - 1))
+ >> 4];
+}
+
+static inline uint32_t
+rc_bittree_price(const probability *const probs,
+ const uint32_t bit_levels, uint32_t symbol)
+{
+ uint32_t price = 0;
+ symbol += 1U << bit_levels;
+
+ do {
+ const uint32_t bit = symbol & 1;
+ symbol >>= 1;
+ price += rc_bit_price(probs[symbol], bit);
+ } while (symbol != 1);
+
+ return price;
+}
+
+static void
+length_update_prices(lzma_length_encoder *lc, const uint32_t pos_state)
+{
+ const uint32_t table_size = lc->table_size;
+ lc->counters[pos_state] = table_size;
+
+ const uint32_t a0 = rc_bit_0_price(lc->choice);
+ const uint32_t a1 = rc_bit_1_price(lc->choice);
+ const uint32_t b0 = a1 + rc_bit_0_price(lc->choice2);
+ const uint32_t b1 = a1 + rc_bit_1_price(lc->choice2);
+ uint32_t *const prices = lc->prices[pos_state];
+
+ uint32_t i;
+ for (i = 0; i < table_size && i < (1 << 3); ++i)
+ prices[i] = a0 + rc_bittree_price(lc->low[pos_state],
+ 3, i);
+
+ for (; i < table_size && i < (1 << 3) + (1 << 3); ++i)
+ prices[i] = b0 + rc_bittree_price(lc->mid[pos_state],
+ 3, i - (1 << 3));
+
+ for (; i < table_size; ++i)
+ prices[i] = b1 + rc_bittree_price(lc->high, 8,
+ i - (1 << 3) - (1 << 3));
+
+ return;
+}
+
+static inline void
+length(lzma_range_encoder *rc, lzma_length_encoder *lc,
+ const uint32_t pos_state, uint32_t len, const _Bool fast_mode)
+{
+ len -= 2;
+
+ if (len < (1 << 3)) {
+ rc_bit(rc, &lc->choice, 0);
+ rc_bittree(rc, lc->low[pos_state], 3, len);
+ } else {
+ rc_bit(rc, &lc->choice, 1);
+ len -= (1 << 3);
+
+ if (len < (1 << 3)) {
+ rc_bit(rc, &lc->choice2, 0);
+ rc_bittree(rc, lc->mid[pos_state], 3, len);
+ } else {
+ rc_bit(rc, &lc->choice2, 1);
+ len -= (1 << 3);
+ rc_bittree(rc, lc->high, 8, len);
+ }
+ }
+
+ if (!fast_mode)
+ if (--lc->counters[pos_state] == 0)
+ length_update_prices(lc, pos_state);
+}
+
+static inline void
+rep_match(lzma_lzma1_encoder *coder, const uint32_t pos_state,
+ const uint32_t rep, const uint32_t len)
+{
+ if (rep == 0) {
+ rc_bit(&coder->rc, &coder->is_rep0[coder->state], 0);
+ rc_bit(&coder->rc,
+ &coder->is_rep0_long[coder->state][pos_state],
+ len != 1);
+ } else {
+ const uint32_t distance = coder->reps[rep];
+ rc_bit(&coder->rc, &coder->is_rep0[coder->state], 1);
+
+ if (rep == 1) {
+ rc_bit(&coder->rc, &coder->is_rep1[coder->state], 0);
+ } else {
+ rc_bit(&coder->rc, &coder->is_rep1[coder->state], 1);
+ rc_bit(&coder->rc, &coder->is_rep2[coder->state],
+ rep - 2);
+
+ if (rep == 3)
+ coder->reps[3] = coder->reps[2];
+
+ coder->reps[2] = coder->reps[1];
+ }
+
+ coder->reps[1] = coder->reps[0];
+ coder->reps[0] = distance;
+ }
+
+ if (len == 1) {
+ coder->state = ((coder->state) < 7 ? STATE_LIT_SHORTREP : STATE_NONLIT_REP);
+ } else {
+ length(&coder->rc, &coder->rep_len_encoder, pos_state, len,
+ coder->fast_mode);
+ coder->state = ((coder->state) < 7 ? STATE_LIT_LONGREP : STATE_NONLIT_REP);
+ }
+}
+
+// This array is constantly initialized in the original code. It's quite big
+// so we skip it.
+const uint8_t lzma_fastpos[1 << 13];
+
+static inline uint32_t
+get_dist_slot(uint32_t dist)
+{
+ if (dist < (1U << (13 + ((0) + (0) * (13 - 1)))))
+ return lzma_fastpos[dist];
+
+ if (dist < (1U << (13 + ((0) + (1) * (13 - 1)))))
+ return (uint32_t)(lzma_fastpos[(dist) >> ((0) + (1) * (13 - 1))]) + 2 * ((0) + (1) * (13 - 1));
+
+ return (uint32_t)(lzma_fastpos[(dist) >> ((0) + (2) * (13 - 1))]) + 2 * ((0) + (2) * (13 - 1));
+}
+
+static inline void
+rc_bittree_reverse(lzma_range_encoder *rc, probability *probs,
+ uint32_t bit_count, uint32_t symbol)
+{
+ uint32_t model_index = 1;
+ do {
+ const uint32_t bit = symbol & 1;
+ symbol >>= 1;
+ rc_bit(rc, &probs[model_index], bit);
+ model_index = (model_index << 1) + bit;
+ } while (--bit_count != 0);
+}
+
+static inline void
+rc_direct(lzma_range_encoder *rc, uint32_t value, uint32_t bit_count)
+{
+ do {
+ rc->symbols[rc->count++]
+ = RC_DIRECT_0 + ((value >> --bit_count) & 1);
+ } while (bit_count != 0);
+}
+
+static inline void
+match(lzma_lzma1_encoder *coder, const uint32_t pos_state,
+ const uint32_t distance, const uint32_t len)
+{
+ coder->state = ((coder->state) < 7 ? STATE_LIT_MATCH : STATE_NONLIT_MATCH);
+
+ length(&coder->rc, &coder->match_len_encoder, pos_state, len,
+ coder->fast_mode);
+
+ const uint32_t dist_slot = get_dist_slot(distance);
+ const uint32_t dist_state = ((len) < 4 + 2 ? (len) - 2 : 4 - 1);
+ rc_bittree(&coder->rc, coder->dist_slot[dist_state], 6, dist_slot);
+
+ if (dist_slot >= 4) {
+ const uint32_t footer_bits = (dist_slot >> 1) - 1;
+ const uint32_t base = (2 | (dist_slot & 1)) << footer_bits;
+ const uint32_t dist_reduced = distance - base;
+
+ if (dist_slot < 14) {
+ rc_bittree_reverse(&coder->rc, coder->dist_special + base - dist_slot - 1,
+ footer_bits, dist_reduced);
+ } else {
+ rc_direct(&coder->rc, dist_reduced >> 4,
+ footer_bits - 4);
+ rc_bittree_reverse(
+ &coder->rc, coder->dist_align,
+ 4, dist_reduced & ((1 << 4) - 1));
+ ++coder->align_price_count;
+ }
+ }
+
+ coder->reps[3] = coder->reps[2];
+ coder->reps[2] = coder->reps[1];
+ coder->reps[1] = coder->reps[0];
+ coder->reps[0] = distance;
+ ++coder->match_price_count;
+}
+
+static void
+encode_symbol(lzma_lzma1_encoder *coder, lzma_mf *mf,
+ uint32_t back, uint32_t len, uint32_t position)
+{
+ const uint32_t pos_state = position & coder->pos_mask;
+
+ if (back == (4294967295U)) {
+ rc_bit(&coder->rc,
+ &coder->is_match[coder->state][pos_state], 0);
+ literal(coder, mf, position);
+ } else {
+ rc_bit(&coder->rc,
+ &coder->is_match[coder->state][pos_state], 1);
+
+ if (back < 4) {
+ rc_bit(&coder->rc, &coder->is_rep[coder->state], 1);
+ rep_match(coder, pos_state, back, len);
+ } else {
+ rc_bit(&coder->rc, &coder->is_rep[coder->state], 0);
+ match(coder, pos_state, back - 4, len);
+ }
+ }
+ mf->read_ahead -= len;
+}
+
+static void
+encode_eopm(lzma_lzma1_encoder *coder, uint32_t position)
+{
+ const uint32_t pos_state = position & coder->pos_mask;
+ rc_bit(&coder->rc, &coder->is_match[coder->state][pos_state], 1);
+ rc_bit(&coder->rc, &coder->is_rep[coder->state], 0);
+ match(coder, pos_state, (4294967295U), 2);
+}
+
+static inline void
+rc_flush(lzma_range_encoder *rc)
+{
+ for (size_t i = 0; i < 5; ++i)
+ rc->symbols[rc->count++] = RC_FLUSH;
+}
+
+extern void exit (int __status)
+ __attribute__ ((__nothrow__ , __leaf__ , __noreturn__));
+
+extern lzma_ret
+lzma_lzma_encode(lzma_lzma1_encoder *restrict coder, lzma_mf *restrict mf,
+ uint8_t *restrict out, size_t *restrict out_pos,
+ size_t out_size, uint32_t limit)
+{
+
+ if (!coder->is_initialized && !encode_init(coder, mf))
+ return LZMA_OK;
+
+ uint32_t position = mf_position(mf);
+
+ while (1) {
+ if (rc_encode(&coder->rc, out, out_pos, out_size)) {
+ return LZMA_OK;
+ }
+
+ if (limit != (4294967295U)
+ && (mf->read_pos - mf->read_ahead >= limit
+ || *out_pos + rc_pending(&coder->rc)
+ >= (1U << 16) - ((1 << 12) + 1)))
+ break;
+
+ if (mf->read_pos >= mf->read_limit) {
+ if (mf->action == LZMA_RUN)
+ return LZMA_OK;
+
+
+ if (mf->read_ahead == 0)
+ break;
+ }
+ uint32_t len;
+ uint32_t back;
+
+ if (coder->fast_mode)
+ lzma_lzma_optimum_fast(coder, mf, &back, &len);
+ else
+ // The original code contains the call to
+ // lzma_lzma_optimum_normal(coder, mf, &back, &len, position);
+ exit (-1);
+
+ encode_symbol(coder, mf, back, len, position);
+
+ position += len;
+ }
+
+ if (!coder->is_flushed) {
+ coder->is_flushed = 1;
+ if (limit == (4294967295U))
+ encode_eopm(coder, position);
+
+ rc_flush(&coder->rc);
+
+ if (rc_encode(&coder->rc, out, out_pos, out_size)) {
+ return LZMA_OK;
+ }
+ }
+
+ coder->is_flushed = 0;
+ return LZMA_STREAM_END;
+}
+
+extern void
+lzma_free(void *ptr, const lzma_allocator *allocator)
+{
+ if (allocator != ((void *)0) && allocator->free != ((void *)0))
+ allocator->free(allocator->opaque, ptr);
+ else
+ free(ptr);
+ return;
+}
+
+static _Bool
+lz_encoder_prepare(lzma_mf *mf, const lzma_allocator *allocator,
+ const lzma_lz_options *lz_options)
+{
+ if (lz_options->dict_size < 4096U
+ || lz_options->dict_size
+ > (1U << 30) + (1U << 29)
+ || lz_options->nice_len > lz_options->match_len_max)
+ return 1;
+
+ mf->keep_size_before = lz_options->before_size + lz_options->dict_size;
+ mf->keep_size_after = lz_options->after_size
+ + lz_options->match_len_max;
+ uint32_t reserve = lz_options->dict_size / 2;
+ if (reserve > (1U << 30))
+ reserve /= 2;
+
+ reserve += (lz_options->before_size + lz_options->match_len_max
+ + lz_options->after_size) / 2 + (1U << 19);
+
+ const uint32_t old_size = mf->size;
+ mf->size = mf->keep_size_before + reserve + mf->keep_size_after;
+
+ if ((mf->buffer != ((void *)0)) && old_size != mf->size) {
+ lzma_free(mf->buffer, allocator);
+ mf->buffer = ((void *)0);
+ }
+
+ mf->match_len_max = lz_options->match_len_max;
+ mf->nice_len = lz_options->nice_len;
+ mf->cyclic_size = lz_options->dict_size + 1;
+
+ switch (lz_options->match_finder) {
+ case LZMA_MF_HC3:
+ mf->find = &lzma_mf_hc3_find;
+ mf->skip = &lzma_mf_hc3_skip;
+ break;
+
+ case LZMA_MF_HC4:
+ mf->find = &lzma_mf_hc4_find;
+ mf->skip = &lzma_mf_hc4_skip;
+ break;
+
+ case LZMA_MF_BT2:
+ mf->find = &lzma_mf_bt2_find;
+ mf->skip = &lzma_mf_bt2_skip;
+ break;
+
+ case LZMA_MF_BT3:
+ mf->find = &lzma_mf_bt3_find;
+ mf->skip = &lzma_mf_bt3_skip;
+ break;
+
+ case LZMA_MF_BT4:
+ mf->find = &lzma_mf_bt4_find;
+ mf->skip = &lzma_mf_bt4_skip;
+ break;
+
+ default:
+ return 1;
+ }
+
+ const uint32_t hash_bytes = lz_options->match_finder & 0x0F;
+ if (hash_bytes > mf->nice_len)
+ return 1;
+
+ const _Bool is_bt = (lz_options->match_finder & 0x10) != 0;
+ uint32_t hs;
+
+ if (hash_bytes == 2) {
+ hs = 0xFFFF;
+ } else {
+ hs = lz_options->dict_size - 1;
+ hs |= hs >> 1;
+ hs |= hs >> 2;
+ hs |= hs >> 4;
+ hs |= hs >> 8;
+ hs >>= 1;
+ hs |= 0xFFFF;
+
+ if (hs > (1U << 24)) {
+ if (hash_bytes == 3)
+ hs = (1U << 24) - 1;
+ else
+ hs >>= 1;
+ }
+ }
+
+ mf->hash_mask = hs;
+
+ ++hs;
+ if (hash_bytes > 2)
+ hs += (1U << 10);
+ if (hash_bytes > 3)
+ hs += (1U << 16);
+
+ const uint32_t old_hash_count = mf->hash_count;
+ const uint32_t old_sons_count = mf->sons_count;
+ mf->hash_count = hs;
+ mf->sons_count = mf->cyclic_size;
+ if (is_bt)
+ mf->sons_count *= 2;
+
+ if (old_hash_count != mf->hash_count
+ || old_sons_count != mf->sons_count) {
+ lzma_free(mf->hash, allocator);
+ mf->hash = ((void *)0);
+
+ lzma_free(mf->son, allocator);
+ mf->son = ((void *)0);
+ }
+
+ mf->depth = lz_options->depth;
+ if (mf->depth == 0) {
+ if (is_bt)
+ mf->depth = 16 + mf->nice_len / 2;
+ else
+ mf->depth = 4 + mf->nice_len / 4;
+ }
+
+ return 0;
+}
+
+int
+main ()
+{
+ lzma_mf mf;
+ lzma_allocator allocator;
+ lzma_lz_options lz_options;
+
+ void *coder;
+ uint8_t *restrict out;
+ size_t *restrict out_pos;
+ size_t out_size;
+
+ lz_encoder_prepare(&mf, &allocator, &lz_options);
+ return (int) lzma_lzma_encode(coder, &mf, out, out_pos, out_size, (4294967295U));
+}
+
+
+/* { dg-final { scan-wpa-ipa-dump "Save results of indirect call analysis." "icp"} } */
+/* { dg-final { scan-wpa-ipa-dump-times "For call" 2 "icp"} } */
+/* { dg-final { scan-wpa-ipa-dump-times "Insert 0 prefetch stmt:" 5 "ipa_prefetch"} } */
+/* { dg-final { scan-wpa-ipa-dump-times "Insert 1 prefetch stmt:" 4 "ipa_prefetch"} } */
+/* { dg-final { scan-wpa-ipa-dump-times "Insert 2 prefetch stmt:" 2 "ipa_prefetch"} } */
--
2.33.0
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化