加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
stardict.cc 71.78 KB
一键复制 编辑 原始数据 按行查看 历史
Boyuan Yang 提交于 2018-07-07 17:33 . Fix typos found by codespell
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "stardict.hh"
#include "btreeidx.hh"
#include "folding.hh"
#include "utf8.hh"
#include "chunkedstorage.hh"
#include "dictzip.h"
#include "xdxf2html.hh"
#include "htmlescape.hh"
#include "langcoder.hh"
#include "gddebug.hh"
#include "fsencoding.hh"
#include "filetype.hh"
#include "indexedzip.hh"
#include "tiff.hh"
#include "ftshelpers.hh"
#include "wstring_qt.hh"
#include "audiolink.hh"
#include <zlib.h>
#include <map>
#include <set>
#include <string>
#ifndef __WIN32
#include <arpa/inet.h>
#else
#include <winsock.h>
#endif
#include <stdlib.h>
#ifdef _MSC_VER
#include <stub_msvc.h>
#endif
#include <QString>
#include <QSemaphore>
#include <QThreadPool>
#include <QAtomicInt>
#include <QDebug>
#include <QRegExp>
#include <QStringList>
#include <QDomDocument>
#include <QDomNode>
#include "ufile.hh"
#include "qt4x5.hh"
#if QT_VERSION >= QT_VERSION_CHECK( 5, 0, 0 )
#include <QRegularExpression>
#endif
namespace Stardict {
using std::map;
using std::multimap;
using std::pair;
using std::set;
using std::string;
using gd::wstring;
using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords;
using BtreeIndexing::IndexInfo;
namespace {
DEF_EX( exNotAnIfoFile, "Not an .ifo file", Dictionary::Ex )
DEF_EX_STR( exBadFieldInIfo, "Bad field in .ifo file encountered:", Dictionary::Ex )
DEF_EX_STR( exNoIdxFile, "No corresponding .idx file was found for", Dictionary::Ex )
DEF_EX_STR( exNoDictFile, "No corresponding .dict file was found for", Dictionary::Ex )
DEF_EX_STR( exNoSynFile, "No corresponding .syn file was found for", Dictionary::Ex )
DEF_EX( ex64BitsNotSupported, "64-bit indices are not presently supported, sorry", Dictionary::Ex )
DEF_EX( exDicttypeNotSupported, "Dictionaries with dicttypes are not supported, sorry", Dictionary::Ex )
DEF_EX_STR( exCantReadFile, "Can't read file", Dictionary::Ex )
DEF_EX_STR( exWordIsTooLarge, "Enountered a word that is too large:", Dictionary::Ex )
DEF_EX_STR( exSuddenEndOfFile, "Sudden end of file", Dictionary::Ex )
DEF_EX_STR( exDictzipError, "DICTZIP error", Dictionary::Ex )
DEF_EX_STR( exIncorrectOffset, "Incorrect offset encountered in file", Dictionary::Ex )
/// Contents of an ifo file
struct Ifo
{
string version;
string bookname;
uint32_t wordcount, synwordcount, idxfilesize, idxoffsetbits;
string sametypesequence, dicttype, description;
string copyright, author, email, website, date;
Ifo( File::Class & );
};
enum
{
Signature = 0x58444953, // SIDX on little-endian, XDIS on big-endian
CurrentFormatVersion = 9 + BtreeIndexing::FormatVersion + Folding::Version
};
struct IdxHeader
{
uint32_t signature; // First comes the signature, SIDX
uint32_t formatVersion; // File format version (CurrentFormatVersion)
uint32_t chunksOffset; // The offset to chunks' storage
uint32_t indexBtreeMaxElements; // Two fields from IndexInfo
uint32_t indexRootOffset;
uint32_t wordCount; // Saved from Ifo::wordcount
uint32_t synWordCount; // Saved from Ifo::synwordcount
uint32_t bookNameSize; // Book name's length. Used to read it then.
uint32_t sameTypeSequenceSize; // That string's size. Used to read it then.
uint32_t langFrom; // Source language
uint32_t langTo; // Target language
uint32_t hasZipFile; // Non-zero means there's a zip file with resources present
uint32_t zipIndexBtreeMaxElements; // Two fields from IndexInfo of the zip
// resource index.
uint32_t zipIndexRootOffset;
}
#ifndef _MSC_VER
__attribute__((packed))
#endif
;
bool indexIsOldOrBad( string const & indexFile )
{
File::Class idx( indexFile, "rb" );
IdxHeader header;
return idx.readRecords( &header, sizeof( header ), 1 ) != 1 ||
header.signature != Signature ||
header.formatVersion != CurrentFormatVersion;
}
class StardictDictionary: public BtreeIndexing::BtreeDictionary
{
Mutex idxMutex;
File::Class idx;
IdxHeader idxHeader;
string bookName;
string sameTypeSequence;
ChunkedStorage::Reader chunks;
Mutex dzMutex;
dictData * dz;
Mutex resourceZipMutex;
IndexedZip resourceZip;
public:
StardictDictionary( string const & id, string const & indexFile,
vector< string > const & dictionaryFiles );
~StardictDictionary();
virtual string getName() throw()
{ return bookName; }
virtual map< Dictionary::Property, string > getProperties() throw()
{ return map< Dictionary::Property, string >(); }
virtual unsigned long getArticleCount() throw()
{ return idxHeader.wordCount; }
virtual unsigned long getWordCount() throw()
{ return idxHeader.wordCount + idxHeader.synWordCount; }
inline virtual quint32 getLangFrom() const
{ return idxHeader.langFrom; }
inline virtual quint32 getLangTo() const
{ return idxHeader.langTo; }
virtual sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & )
THROW_SPEC( std::exception );
virtual sptr< Dictionary::DataRequest > getArticle( wstring const &,
vector< wstring > const & alts,
wstring const &,
bool ignoreDiacritics )
THROW_SPEC( std::exception );
virtual sptr< Dictionary::DataRequest > getResource( string const & name )
THROW_SPEC( std::exception );
virtual QString const& getDescription();
virtual QString getMainFilename();
virtual sptr< Dictionary::DataRequest > getSearchResults( QString const & searchString,
int searchMode, bool matchCase,
int distanceBetweenWords,
int maxResults,
bool ignoreWordsOrder,
bool ignoreDiacritics );
virtual void getArticleText( uint32_t articleAddress, QString & headword, QString & text );
virtual void makeFTSIndex(QAtomicInt & isCancelled, bool firstIteration );
virtual void setFTSParameters( Config::FullTextSearch const & fts )
{
can_FTS = fts.enabled
&& !fts.disabledTypes.contains( "STARDICT", Qt::CaseInsensitive )
&& ( fts.maxDictionarySize == 0 || getArticleCount() <= fts.maxDictionarySize );
}
protected:
void loadIcon() throw();
private:
/// Retrieves the article's offset/size in .dict file, and its headword.
void getArticleProps( uint32_t articleAddress,
string & headword,
uint32_t & offset, uint32_t & size );
/// Loads the article, storing its headword and formatting the data it has
/// into an html.
void loadArticle( uint32_t address,
string & headword,
string & articleText );
string loadString( size_t size );
string handleResource( char type, char const * resource, size_t size );
void pangoToHtml( QString & text );
friend class StardictResourceRequest;
friend class StardictArticleRequest;
friend class StardictHeadwordsRequest;
};
StardictDictionary::StardictDictionary( string const & id,
string const & indexFile,
vector< string > const & dictionaryFiles ):
BtreeDictionary( id, dictionaryFiles ),
idx( indexFile, "rb" ),
idxHeader( idx.read< IdxHeader >() ),
bookName( loadString( idxHeader.bookNameSize ) ),
sameTypeSequence( loadString( idxHeader.sameTypeSequenceSize ) ),
chunks( idx, idxHeader.chunksOffset )
{
// Open the .dict file
DZ_ERRORS error;
dz = dict_data_open( dictionaryFiles[ 2 ].c_str(), &error, 0 );
if ( !dz )
throw exDictzipError( string( dz_error_str( error ) )
+ "(" + dictionaryFiles[ 2 ] + ")" );
// Initialize the index
openIndex( IndexInfo( idxHeader.indexBtreeMaxElements,
idxHeader.indexRootOffset ),
idx, idxMutex );
// Open a resource zip file, if there's one
if ( idxHeader.hasZipFile &&
( idxHeader.zipIndexBtreeMaxElements ||
idxHeader.zipIndexRootOffset ) )
{
resourceZip.openIndex( IndexInfo( idxHeader.zipIndexBtreeMaxElements,
idxHeader.zipIndexRootOffset ),
idx, idxMutex );
QString zipName = QDir::fromNativeSeparators(
FsEncoding::decode( getDictionaryFilenames().back().c_str() ) );
if ( zipName.endsWith( ".zip", Qt::CaseInsensitive ) ) // Sanity check
resourceZip.openZipFile( zipName );
}
// Full-text search parameters
can_FTS = true;
ftsIdxName = indexFile + "_FTS";
if( !Dictionary::needToRebuildIndex( dictionaryFiles, ftsIdxName )
&& !FtsHelpers::ftsIndexIsOldOrBad( ftsIdxName, this ) )
FTS_index_completed.ref();
}
StardictDictionary::~StardictDictionary()
{
if ( dz )
dict_data_close( dz );
}
void StardictDictionary::loadIcon() throw()
{
if ( dictionaryIconLoaded )
return;
QString fileName =
QDir::fromNativeSeparators( FsEncoding::decode( getDictionaryFilenames()[ 0 ].c_str() ) );
// Remove the extension
fileName.chop( 3 );
if( !loadIconFromFile( fileName ) )
{
// Load failed -- use default icons
dictionaryNativeIcon = dictionaryIcon = QIcon(":/icons/icon32_stardict.png");
}
dictionaryIconLoaded = true;
}
string StardictDictionary::loadString( size_t size )
{
vector< char > data( size );
idx.read( &data.front(), data.size() );
return string( &data.front(), data.size() );
}
void StardictDictionary::getArticleProps( uint32_t articleAddress,
string & headword,
uint32_t & offset, uint32_t & size )
{
vector< char > chunk;
Mutex::Lock _( idxMutex );
char * articleData = chunks.getBlock( articleAddress, chunk );
memcpy( &offset, articleData, sizeof( uint32_t ) );
articleData += sizeof( uint32_t );
memcpy( &size, articleData, sizeof( uint32_t ) );
articleData += sizeof( uint32_t );
headword = articleData;
}
class PowerWordDataProcessor{
class PWSyntaxTranslate{
public:
PWSyntaxTranslate(const char* re, const char* replacement)
#if QT_VERSION >= QT_VERSION_CHECK( 5, 0, 0 )
: _re(re, QRegularExpression::UseUnicodePropertiesOption )
#else
: _re(re)
#endif
, _replacement(replacement)
{
}
#if QT_VERSION >= QT_VERSION_CHECK( 5, 0, 0 )
const QRegularExpression & re() const {
#else
const QRegExp& re() const {
#endif
return _re;
}
const QString & replacement() const {
return _replacement;
}
private:
#if QT_VERSION >= QT_VERSION_CHECK( 5, 0, 0 )
QRegularExpression _re;
#else
QRegExp _re;
#endif
QString _replacement;
};
public:
PowerWordDataProcessor(const char* resource, size_t size)
: _data(QString::fromUtf8(resource, size))
{
}
string process() {
QDomDocument doc;
QString ss;
ss = "<div class=\"sdct_k\">";
if (!doc.setContent(_data)) {
ss += _data ;
} else {
QStringList sl;
walkNode(doc.firstChild(), sl);
QStringListIterator itr(sl);
while (itr.hasNext()) {
QString s = itr.next();
translatePW(s);
ss += s;
ss += "<br>";
}
}
ss += "</div>";
QByteArray ba = ss.toUtf8();
return string(ba.data(), ba.size());
}
private:
void walkNode(const QDomNode& e, QStringList& sl) {
if (e.isNull()) {
return;
}
if (e.isText()) {
sl.append(e.toText().data());
} else {
QDomNodeList l = e.childNodes();
for (int i = 0; i < l.size(); ++i) {
QDomNode n = l.at(i);
if (n.isText()) {
sl.append(n.toText().data());
} else {
walkNode(n, sl);
}
}
}
}
void translatePW(QString& s){
const int TRANSLATE_TBL_SIZE=5;
static PWSyntaxTranslate t[TRANSLATE_TBL_SIZE]={
PWSyntaxTranslate("&[bB]\\s*\\{([^\\{}&]+)\\}", "<B>\\1</B>"),
PWSyntaxTranslate("&[iI]\\s*\\{([^\\{}&]+)\\}", "<I>\\1</I>"),
PWSyntaxTranslate("&[uU]\\s*\\{([^\\{}&]+)\\}", "<U>\\1</U>"),
PWSyntaxTranslate("&[lL]\\s*\\{([^\\{}&]+)\\}", "<SPAN style=\"color:#0000ff\">\\1</SPAN>"),
PWSyntaxTranslate("&[2]\\s*\\{([^\\{}&]+)\\}", "<SPAN style=\"color:#0000ff\">\\1</SPAN>")
};
QString old;
while (s.compare(old) != 0) {
for (int i = 0; i < TRANSLATE_TBL_SIZE; ++i) {
PWSyntaxTranslate& a = t[i];
s.replace(a.re(), a.replacement());
}
old = s;
}
#if QT_VERSION >= QT_VERSION_CHECK( 5, 0, 0 )
s.replace(QRegularExpression( "&.\\s*\\{",
QRegularExpression::UseUnicodePropertiesOption
| QRegularExpression::DotMatchesEverythingOption),
"");
#else
s.replace(QRegExp("&.\\s*\\{"), "");
#endif
s.replace("}", "");
}
private:
QString _data;
};
/// This function tries to make an html of the Stardict's resource typed
/// 'type', contained in a block pointed to by 'resource', 'size' bytes long.
string StardictDictionary::handleResource( char type, char const * resource, size_t size )
{
QString text;
switch( type )
{
case 'x': // Xdxf content
return Xdxf2Html::convert( string( resource, size ), Xdxf2Html::STARDICT, NULL, this, &resourceZip );
case 'h': // Html content
{
QString articleText = QString( "<div class=\"sdct_h\">" ) + QString::fromUtf8( resource, size ) + "</div>";
#if QT_VERSION >= QT_VERSION_CHECK( 5, 0, 0 )
QRegularExpression imgRe( "(<\\s*img\\s+[^>]*src\\s*=\\s*[\"']+)(?!(?:data|https?|ftp):)",
QRegularExpression::CaseInsensitiveOption
| QRegularExpression::InvertedGreedinessOption );
QRegularExpression linkRe( "(<\\s*link\\s+[^>]*href\\s*=\\s*[\"']+)(?!(?:data|https?|ftp):)",
QRegularExpression::CaseInsensitiveOption
| QRegularExpression::InvertedGreedinessOption );
#else
QRegExp imgRe( "(<\\s*img\\s+[^>]*src\\s*=\\s*[\"']+)(?!(?:data|https?|ftp):)", Qt::CaseInsensitive );
imgRe.setMinimal( true );
QRegExp linkRe( "(<\\s*link\\s+[^>]*href\\s*=\\s*[\"']+)(?!(?:data|https?|ftp):)", Qt::CaseInsensitive );
linkRe.setMinimal( true );
#endif
articleText.replace( imgRe , "\\1bres://" + QString::fromStdString( getId() ) + "/" )
.replace( linkRe, "\\1bres://" + QString::fromStdString( getId() ) + "/" );
// Handle links to articles
#if QT_VERSION >= QT_VERSION_CHECK( 5, 0, 0 )
QRegularExpression linksReg( "<a(\\s*[^>]*)href\\s*=\\s*['\"](bword://)?([^'\"]+)['\"]",
QRegularExpression::CaseInsensitiveOption );
#else
QRegExp linksReg( "<a(\\s*[^>]*)href\\s*=\\s*['\"](bword://)?([^'\"]+)['\"]", Qt::CaseInsensitive );
linksReg.setMinimal( true );
#endif
int pos = 0;
#if QT_VERSION >= QT_VERSION_CHECK( 5, 0, 0 )
QString articleNewText;
QRegularExpressionMatchIterator it = linksReg.globalMatch( articleText );
while( it.hasNext() )
{
QRegularExpressionMatch match = it.next();
articleNewText += articleText.midRef( pos, match.capturedStart() - pos );
pos = match.capturedEnd();
QString link = match.captured( 3 );
#else
while( pos >= 0 )
{
pos = linksReg.indexIn( articleText, pos );
if( pos < 0 )
break;
QString link = linksReg.cap( 3 );
#endif
if( link.indexOf( ':' ) < 0 )
{
QString newLink;
if( link.indexOf( '#' ) < 0 )
#if QT_VERSION >= QT_VERSION_CHECK( 5, 0, 0 )
newLink = QString( "<a" ) + match.captured( 1 ) + "href=\"bword:" + link + "\"";
#else
newLink = QString( "<a" ) + linksReg.cap( 1 ) + "href=\"bword:" + link + "\"";
#endif
// Anchors
if( link.indexOf( '#' ) > 0 )
{
#if QT_VERSION >= QT_VERSION_CHECK( 5, 0, 0 )
newLink = QString( "<a" ) + match.captured( 1 ) + "href=\"gdlookup://localhost/" + link + "\"";
#else
newLink = QString( "<a" ) + linksReg.cap( 1 ) + "href=\"gdlookup://localhost/" + link + "\"";
#endif
newLink.replace( "#", "?gdanchor=" );
}
if( !newLink.isEmpty() )
{
#if QT_VERSION >= QT_VERSION_CHECK( 5, 0, 0 )
articleNewText += newLink;
#else
articleText.replace( pos, linksReg.cap( 0 ).size(), newLink );
pos += newLink.size();
#endif
}
else
#if QT_VERSION >= QT_VERSION_CHECK( 5, 0, 0 )
articleNewText += match.captured();
#else
pos += linksReg.cap( 0 ).size();
#endif
}
else
#if QT_VERSION >= QT_VERSION_CHECK( 5, 0, 0 )
articleNewText += match.captured();
}
if( pos )
{
articleNewText += articleText.midRef( pos );
articleText = articleNewText;
articleNewText.clear();
}
#else
pos += linksReg.cap( 0 ).size();
}
#endif
// Handle "audio" tags
#if QT_VERSION >= QT_VERSION_CHECK( 5, 0, 0 )
QRegularExpression audioRe( "<\\s*audio\\s*src\\s*=\\s*([\"']+)([^\"']+)([\"'])\\s*>(.*)</audio>",
QRegularExpression::CaseInsensitiveOption
| QRegularExpression::DotMatchesEverythingOption
| QRegularExpression::InvertedGreedinessOption );
#else
QRegExp audioRe( "<\\s*audio\\s*src\\s*=\\s*([\"']+)([^\"']+)([\"'])\\s*>(.*)</audio>", Qt::CaseInsensitive );
audioRe.setMinimal( true );
#endif
pos = 0;
#if QT_VERSION >= QT_VERSION_CHECK( 5, 0, 0 )
it = audioRe.globalMatch( articleText );
while( it.hasNext() )
{
QRegularExpressionMatch match = it.next();
articleNewText += articleText.midRef( pos, match.capturedStart() - pos );
pos = match.capturedEnd();
QString src = match.captured( 2 );
#else
while( pos >= 0 )
{
pos = audioRe.indexIn( articleText, pos );
if( pos < 0 )
break;
QString src = audioRe.cap( 2 );
#endif
if( src.indexOf( "://" ) >= 0 )
#if QT_VERSION >= QT_VERSION_CHECK( 5, 0, 0 )
articleNewText += match.captured();
#else
pos += audioRe.cap( 0 ).length();
#endif
else
{
std::string href = "\"gdau://" + getId() + "/" + src.toUtf8().data() + "\"";
QString newTag = QString::fromUtf8( ( addAudioLink( href, getId() ) + "<span class=\"sdict_h_wav\"><a href=" + href + ">" ).c_str() );
#if QT_VERSION >= QT_VERSION_CHECK( 5, 0, 0 )
newTag += match.captured( 4 );
if( match.captured( 4 ).indexOf( "<img " ) < 0 )
#else
newTag += audioRe.cap( 4 );
if( audioRe.cap( 4 ).indexOf( "<img " ) < 0 )
#endif
newTag += " <img src=\"qrcx://localhost/icons/playsound.png\" border=\"0\" alt=\"Play\">";
newTag += "</a></span>";
#if QT_VERSION >= QT_VERSION_CHECK( 5, 0, 0 )
articleNewText += newTag;
#else
articleText.replace( pos, audioRe.cap( 0 ).length(), newTag );
pos += newTag.length();
#endif
}
}
#if QT_VERSION >= QT_VERSION_CHECK( 5, 0, 0 )
if( pos )
{
articleNewText += articleText.midRef( pos );
articleText = articleNewText;
articleNewText.clear();
}
#endif
return ( articleText.toUtf8().data() );
}
case 'm': // Pure meaning, usually means preformatted text
return "<div class=\"sdct_m\">" + Html::preformat( string( resource, size ), isToLanguageRTL() ) + "</div>";
case 'l': // Same as 'm', but not in utf8, instead in current locale's
// encoding.
// We just use Qt here, it should know better about system's
// locale.
return "<div class=\"sdct_l\">" + Html::preformat( QString::fromLocal8Bit( resource, size ).toUtf8().data(),
isToLanguageRTL() )
+ "</div>";
case 'g': // Pango markup.
text = QString::fromUtf8( resource, size );
pangoToHtml( text );
return "<div class=\"sdct_g\">" + string( text.toUtf8().data() ) + "</div>";
case 't': // Transcription
return "<div class=\"sdct_t\">" + Html::escape( string( resource, size ) ) + "</div>";
case 'y': // Chinese YinBiao or Japanese KANA. Examples are needed. For now,
// just output as pure escaped utf8.
return "<div class=\"sdct_y\">" + Html::escape( string( resource, size ) ) + "</div>";
case 'k': // KingSoft PowerWord data.
{
PowerWordDataProcessor pwdp(resource, size);
return pwdp.process();
}
case 'w': // MediaWiki markup. We don't handle this right now.
return "<div class=\"sdct_w\">" + Html::escape( string( resource, size ) ) + "</div>";
case 'n': // WordNet data. We don't know anything about it.
return "<div class=\"sdct_n\">" + Html::escape( string( resource, size ) ) + "</div>";
case 'r': // Resource file list. For now, resources aren't handled.
return "<div class=\"sdct_r\">" + Html::escape( string( resource, size ) ) + "</div>";
case 'W': // An embedded Wav file. Unhandled yet.
return "<div class=\"sdct_W\">(an embedded .wav file)</div>";
case 'P': // An embedded picture file. Unhandled yet.
return "<div class=\"sdct_P\">(an embedded picture file)</div>";
}
if ( islower( type ) )
{
return string( "<b>Unknown textual entry type " ) + string( 1, type ) + ":</b> " + Html::escape( string( resource, size ) ) + "<br>";
}
else
return string( "<b>Unknown blob entry type " ) + string( 1, type ) + "</b><br>";
}
void StardictDictionary::pangoToHtml( QString & text )
{
/*
* Partially support for Pango Markup Language
* Attributes "fallback", "lang", "gravity", "gravity_hint" just ignored
*/
QRegExp spanRegex( "<span\\s*([^>]*)>", Qt::CaseInsensitive );
QRegExp styleRegex( "(\\w+)=\"([^\"]*)\"" );
text.replace( "\n", "<br>" );
int pos = 0;
do
{
pos = spanRegex.indexIn( text, pos );
if( pos >= 0 )
{
QString styles = spanRegex.cap( 1 );
QString newSpan( "<span style=\"" );
int stylePos = 0;
do
{
stylePos = styleRegex.indexIn( styles, stylePos );
QString style = styleRegex.cap( 1 );
if( stylePos >= 0 )
{
if( style.compare( "font_desc", Qt::CaseInsensitive ) == 0
|| style.compare( "font", Qt::CaseInsensitive ) == 0 )
{
// Parse font description
QStringList list = styleRegex.cap( 2 ).split( " ", QString::SkipEmptyParts );
int n;
QString sizeStr, stylesStr, familiesStr;
for( n = list.size() - 1; n >= 0; n-- )
{
QString str = list.at( n );
// font size
if( str[ 0 ].isNumber() )
{
sizeStr = QString( "font-size:" ) + str + ";";
continue;
}
// font style
if( str.compare( "normal", Qt::CaseInsensitive ) == 0
|| str.compare( "oblique", Qt::CaseInsensitive ) == 0
|| str.compare( "italic", Qt::CaseInsensitive ) == 0 )
{
if( !stylesStr.contains( "font-style:" ) )
stylesStr += QString( "font-style:" ) + str + ";";
continue;
}
// font variant
if( str.compare( "smallcaps", Qt::CaseInsensitive ) == 0 )
{
stylesStr += QString( "font-variant:small-caps" ) ;
continue;
}
// font weight
if( str.compare( "ultralight", Qt::CaseInsensitive ) == 0 )
{
stylesStr += QString( "font-weight:100;" );
continue;
}
if( str.compare( "light", Qt::CaseInsensitive ) == 0 )
{
stylesStr += QString( "font-weight:200;" );
continue;
}
if( str.compare( "bold", Qt::CaseInsensitive ) == 0 )
{
stylesStr += QString( "font-weight:bold;" );
continue;
}
if( str.compare( "ultrabold", Qt::CaseInsensitive ) == 0 )
{
stylesStr += QString( "font-weight:800;" );
continue;
}
if( str.compare( "heavy", Qt::CaseInsensitive ) == 0 )
{
stylesStr += QString( "font-weight:900" );
continue;
}
// font stretch
if( str.compare( "ultracondensed", Qt::CaseInsensitive ) == 0 )
{
stylesStr += QString( "font-stretch:ultra-condensed;" );
continue;
}
if( str.compare( "extracondensed", Qt::CaseInsensitive ) == 0 )
{
stylesStr += QString( "font-stretch:extra-condensed;" );
continue;
}
if( str.compare( "semicondensed", Qt::CaseInsensitive ) == 0 )
{
stylesStr += QString( "font-stretch:semi-condensed;" );
continue;
}
if( str.compare( "semiexpanded", Qt::CaseInsensitive ) == 0 )
{
stylesStr += QString( "font-stretch:semi-expanded;" );
continue;
}
if( str.compare( "extraexpanded", Qt::CaseInsensitive ) == 0 )
{
stylesStr += QString( "font-stretch:extra-expanded;" );
continue;
}
if( str.compare( "ultraexpanded", Qt::CaseInsensitive ) == 0 )
{
stylesStr += QString( "font-stretch:ultra-expanded;" );
continue;
}
if( str.compare( "condensed", Qt::CaseInsensitive ) == 0
|| str.compare( "expanded", Qt::CaseInsensitive ) == 0 )
{
stylesStr += QString( "font-stretch:" ) + str + ";";
continue;
}
// gravity
if( str.compare( "south", Qt::CaseInsensitive ) == 0
|| str.compare( "east", Qt::CaseInsensitive ) == 0
|| str.compare( "north", Qt::CaseInsensitive ) == 0
|| str.compare( "west", Qt::CaseInsensitive ) == 0
|| str.compare( "auto", Qt::CaseInsensitive ) == 0 )
{
continue;
}
break;
}
// last words is families list
if( n >= 0 )
{
familiesStr = QString( "font-family:" );
for( int i = 0; i <= n; i++ )
{
if( i > 0 && !familiesStr.endsWith( ',' ) )
familiesStr += ",";
familiesStr += list.at( i );
}
familiesStr += ";";
}
newSpan += familiesStr + stylesStr + sizeStr;
}
else if( style.compare( "font_family", Qt::CaseInsensitive ) == 0
|| style.compare( "face", Qt::CaseInsensitive ) == 0 )
newSpan += QString( "font-family:" ) + styleRegex.cap( 2 ) + ";";
else if( style.compare( "font_size", Qt::CaseInsensitive ) == 0
|| style.compare( "size", Qt::CaseInsensitive ) == 0 )
{
if( styleRegex.cap( 2 )[ 0 ].isLetter()
|| styleRegex.cap( 2 ).endsWith( "px", Qt::CaseInsensitive )
|| styleRegex.cap( 2 ).endsWith( "pt", Qt::CaseInsensitive )
|| styleRegex.cap( 2 ).endsWith( "em", Qt::CaseInsensitive )
|| styleRegex.cap( 2 ).endsWith( "%" ) )
newSpan += QString( "font-size:" ) + styleRegex.cap( 2 ) +";";
else
{
int size = styleRegex.cap( 2 ).toInt();
if( size )
newSpan += QString( "font-size:%1pt;" ).arg( size / 1024.0, 0, 'f', 3 );
}
}
else if( style.compare( "font_style", Qt::CaseInsensitive ) == 0
|| style.compare( "style", Qt::CaseInsensitive ) == 0)
newSpan += QString( "font-style:" ) + styleRegex.cap( 2 ) + ";";
else if( style.compare( "weight", Qt::CaseInsensitive ) == 0
|| style.compare( "weight", Qt::CaseInsensitive ) == 0)
{
QString str = styleRegex.cap( 2 );
if( str.compare( "ultralight", Qt::CaseInsensitive ) == 0 )
newSpan += QString( "font-weight:100;" );
else if( str.compare( "light", Qt::CaseInsensitive ) == 0 )
newSpan += QString( "font-weight:200;" );
else if( str.compare( "ultrabold", Qt::CaseInsensitive ) == 0 )
newSpan += QString( "font-weight:800;" );
else if( str.compare( "heavy", Qt::CaseInsensitive ) == 0 )
newSpan += QString( "font-weight:900" );
else
newSpan += QString( "font-weight:" ) + str + ";";
}
else if( style.compare( "font_variant", Qt::CaseInsensitive ) == 0
|| style.compare( "variant", Qt::CaseInsensitive ) == 0 )
{
if( styleRegex.cap( 2 ).compare( "smallcaps", Qt::CaseInsensitive ) == 0 )
newSpan += QString( "font-variant:small-caps" );
else
newSpan += QString( "font-variant:" ) + styleRegex.cap( 2 ) + ";";
}
else if( style.compare( "font_stretch", Qt::CaseInsensitive ) == 0
|| style.compare( "stretch", Qt::CaseInsensitive ) == 0 )
{
QString str = styleRegex.cap( 2 );
if( str.compare( "ultracondensed", Qt::CaseInsensitive ) == 0 )
newSpan += QString( "font-stretch:ultra-condensed;" );
else if( str.compare( "extracondensed", Qt::CaseInsensitive ) == 0 )
newSpan += QString( "font-stretch:extra-condensed;" );
else if( str.compare( "semicondensed", Qt::CaseInsensitive ) == 0 )
newSpan += QString( "font-stretch:semi-condensed;" );
else if( str.compare( "semiexpanded", Qt::CaseInsensitive ) == 0 )
newSpan += QString( "font-stretch:semi-expanded;" );
else if( str.compare( "extraexpanded", Qt::CaseInsensitive ) == 0 )
newSpan += QString( "font-stretch:extra-expanded;" );
else if( str.compare( "ultraexpanded", Qt::CaseInsensitive ) == 0 )
newSpan += QString( "font-stretch:ultra-expanded;" );
else
newSpan += QString( "font-stretch:" ) + str + ";";
}
else if( style.compare( "foreground", Qt::CaseInsensitive ) == 0
|| style.compare( "fgcolor", Qt::CaseInsensitive ) == 0
|| style.compare( "color", Qt::CaseInsensitive ) == 0 )
newSpan += QString( "color:" ) + styleRegex.cap( 2 ) + ";";
else if( style.compare( "background", Qt::CaseInsensitive ) == 0
|| style.compare( "bgcolor", Qt::CaseInsensitive ) == 0 )
newSpan += QString( "background-color:" ) + styleRegex.cap( 2 ) + ";";
else if( style.compare( "underline_color", Qt::CaseInsensitive ) == 0
|| style.compare( "strikethrough_color", Qt::CaseInsensitive ) == 0 )
newSpan += QString( "text-decoration-color:" ) + styleRegex.cap( 2 ) + ";";
else if( style.compare( "underline", Qt::CaseInsensitive ) == 0 )
{
if( styleRegex.cap( 2 ).compare( "none", Qt::CaseInsensitive ) )
newSpan += QString( "text-decoration-line:none;" );
else
{
newSpan += QString( "text-decoration-line:underline; " );
if( styleRegex.cap( 2 ).compare( "low", Qt::CaseInsensitive ) )
newSpan += QString( "text-decoration-style:dotted;" );
else if( styleRegex.cap( 2 ).compare( "single", Qt::CaseInsensitive ) )
newSpan += QString( "text-decoration-style:solid;" );
else if( styleRegex.cap( 2 ).compare( "error", Qt::CaseInsensitive ) )
newSpan += QString( "text-decoration-style:wavy;" );
else
newSpan += QString( "text-decoration-style:" ) + styleRegex.cap( 2 ) + ";";
}
}
else if( style.compare( "strikethrough", Qt::CaseInsensitive ) == 0 )
{
if( styleRegex.cap( 2 ).compare( "true", Qt::CaseInsensitive ) )
newSpan += QString( "text-decoration-line:line-through;" );
else
newSpan += QString( "text-decoration-line:none;" );
}
else if( style.compare( "rise", Qt::CaseInsensitive ) == 0 )
{
if( styleRegex.cap( 2 ).endsWith( "px", Qt::CaseInsensitive )
|| styleRegex.cap( 2 ).endsWith( "pt", Qt::CaseInsensitive )
|| styleRegex.cap( 2 ).endsWith( "em", Qt::CaseInsensitive )
|| styleRegex.cap( 2 ).endsWith( "%" ) )
newSpan += QString( "vertical-align:" ) + styleRegex.cap( 2 ) +";";
else
{
int riseValue = styleRegex.cap( 2 ).toInt();
if( riseValue )
newSpan += QString( "vertical-align:%1pt;" ).arg( riseValue / 1024.0, 0, 'f', 3 );
}
}
else if( style.compare( "letter_spacing", Qt::CaseInsensitive ) == 0 )
{
if( styleRegex.cap( 2 ).endsWith( "px", Qt::CaseInsensitive )
|| styleRegex.cap( 2 ).endsWith( "pt", Qt::CaseInsensitive )
|| styleRegex.cap( 2 ).endsWith( "em", Qt::CaseInsensitive )
|| styleRegex.cap( 2 ).endsWith( "%" ) )
newSpan += QString( "letter-spacing:" ) + styleRegex.cap( 2 ) +";";
else
{
int spacing = styleRegex.cap( 2 ).toInt();
if( spacing )
newSpan += QString( "letter-spacing:%1pt;" ).arg( spacing / 1024.0, 0, 'f', 3 );
}
}
stylePos += styleRegex.matchedLength();
}
}
while( stylePos >= 0 );
newSpan += "\">";
text.replace( pos, spanRegex.matchedLength(), newSpan );
pos += newSpan.size();
}
}
while( pos >= 0 );
text.replace( " ", "&nbsp;&nbsp;" );
}
void StardictDictionary::loadArticle( uint32_t address,
string & headword,
string & articleText )
{
uint32_t offset, size;
getArticleProps( address, headword, offset, size );
char * articleBody;
{
Mutex::Lock _( dzMutex );
// Note that the function always zero-pads the result.
articleBody = dict_data_read_( dz, offset, size, 0, 0 );
}
if ( !articleBody )
{
// throw exCantReadFile( getDictionaryFilenames()[ 2 ] );
articleText = string( "<div class=\"sdict_m\">DICTZIP error: " ) + dict_error_str( dz ) + "</div>";
return;
}
articleText.clear();
char * ptr = articleBody;
if ( sameTypeSequence.size() )
{
/// The sequence is known, it's not stored in the article itself
for( unsigned seq = 0; seq < sameTypeSequence.size(); ++seq )
{
// Last entry doesn't have size info -- it is inferred from
// the bytes left
bool entrySizeKnown = ( seq == sameTypeSequence.size() - 1 );
uint32_t entrySize = 0;
if ( entrySizeKnown )
entrySize = size;
else
if ( !size )
{
gdWarning( "Stardict: short entry for the word %s encountered in \"%s\".\n", headword.c_str(), getName().c_str() );
break;
}
char type = sameTypeSequence[ seq ];
if ( islower( type ) )
{
// Zero-terminated entry, unless it's the last one
if ( !entrySizeKnown )
entrySize = strlen( ptr );
if ( size < entrySize )
{
gdWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".\n", headword.c_str(), getName().c_str() );
break;
}
articleText += handleResource( type, ptr, entrySize );
if ( !entrySizeKnown )
++entrySize; // Need to skip the zero byte
ptr += entrySize;
size -= entrySize;
}
else
if ( isupper( *ptr ) )
{
// An entry which has its size before contents, unless it's the last one
if ( !entrySizeKnown )
{
if ( size < sizeof( uint32_t ) )
{
gdWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".\n", headword.c_str(), getName().c_str() );
break;
}
memcpy( &entrySize, ptr, sizeof( uint32_t ) );
entrySize = ntohl( entrySize );
ptr += sizeof( uint32_t );
size -= sizeof( uint32_t );
}
if ( size < entrySize )
{
gdWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".\n", headword.c_str(), getName().c_str() );
break;
}
articleText += handleResource( type, ptr, entrySize );
ptr += entrySize;
size -= entrySize;
}
else
{
gdWarning( "Stardict: non-alpha entry type 0x%x for the word %s encountered in \"%s\".\n",
type, headword.c_str(), getName().c_str() );
break;
}
}
}
else
{
// The sequence is stored in each article separately
while( size )
{
if ( islower( *ptr ) )
{
// Zero-terminated entry
size_t len = strlen( ptr + 1 );
if ( size < len + 2 )
{
gdWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".\n", headword.c_str(), getName().c_str() );
break;
}
articleText += handleResource( *ptr, ptr + 1, len );
ptr += len + 2;
size -= len + 2;
}
else
if ( isupper( *ptr ) )
{
// An entry which havs its size before contents
if ( size < sizeof( uint32_t ) + 1 )
{
gdWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".\n", headword.c_str(), getName().c_str() );
break;
}
uint32_t entrySize;
memcpy( &entrySize, ptr + 1, sizeof( uint32_t ) );
entrySize = ntohl( entrySize );
if ( size < sizeof( uint32_t ) + 1 + entrySize )
{
gdWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".\n", headword.c_str(), getName().c_str() );
break;
}
articleText += handleResource( *ptr, ptr + 1 + sizeof( uint32_t ), entrySize );
ptr += sizeof( uint32_t ) + 1 + entrySize;
size -= sizeof( uint32_t ) + 1 + entrySize;
}
else
{
gdWarning( "Stardict: non-alpha entry type 0x%x for the word %s encountered in \"%s\".\n",
(unsigned)*ptr, headword.c_str(), getName().c_str() );
break;
}
}
}
free( articleBody );
}
QString const& StardictDictionary::getDescription()
{
if( !dictionaryDescription.isEmpty() )
return dictionaryDescription;
File::Class ifoFile( getDictionaryFilenames()[ 0 ], "r" );
Ifo ifo( ifoFile );
if( !ifo.copyright.empty() )
{
QString copyright = QString::fromUtf8( ifo.copyright.c_str() )
.replace( "<br>", "\n", Qt::CaseInsensitive );
dictionaryDescription += QString( QObject::tr( "Copyright: %1%2" ) )
.arg( copyright )
.arg( "\n\n" );
}
if( !ifo.author.empty() )
{
QString author = QString::fromUtf8( ifo.author.c_str() );
dictionaryDescription += QString( QObject::tr( "Author: %1%2" ) )
.arg( author )
.arg( "\n\n" );
}
if( !ifo.email.empty() )
{
QString email = QString::fromUtf8( ifo.email.c_str() );
dictionaryDescription += QString( QObject::tr( "E-mail: %1%2" ) )
.arg( email )
.arg( "\n\n" );
}
if( !ifo.website.empty() )
{
QString website = QString::fromUtf8( ifo.website.c_str() );
dictionaryDescription += QString( QObject::tr( "Website: %1%2" ) )
.arg( website )
.arg( "\n\n" );
}
if( !ifo.date.empty() )
{
QString date = QString::fromUtf8( ifo.date.c_str() );
dictionaryDescription += QString( QObject::tr( "Date: %1%2" ) )
.arg( date )
.arg( "\n\n" );
}
if( !ifo.description.empty() )
{
QString desc = QString::fromUtf8( ifo.description.c_str() );
desc.replace( "\t", "<br/>" );
desc.replace( "\\n", "<br/>" );
desc.replace( "<br>", "<br/>", Qt::CaseInsensitive );
dictionaryDescription += Html::unescape( desc, true );
}
if( dictionaryDescription.isEmpty() )
dictionaryDescription = "NONE";
return dictionaryDescription;
}
QString StardictDictionary::getMainFilename()
{
return FsEncoding::decode( getDictionaryFilenames()[ 0 ].c_str() );
}
void StardictDictionary::makeFTSIndex( QAtomicInt & isCancelled, bool firstIteration )
{
if( !( Dictionary::needToRebuildIndex( getDictionaryFilenames(), ftsIdxName )
|| FtsHelpers::ftsIndexIsOldOrBad( ftsIdxName, this ) ) )
FTS_index_completed.ref();
if( haveFTSIndex() )
return;
if( ensureInitDone().size() )
return;
if( firstIteration && getArticleCount() > FTS::MaxDictionarySizeForFastSearch )
return;
gdDebug( "Stardict: Building the full-text index for dictionary: %s\n",
getName().c_str() );
try
{
FtsHelpers::makeFTSIndex( this, isCancelled );
FTS_index_completed.ref();
}
catch( std::exception &ex )
{
gdWarning( "Stardict: Failed building full-text search index for \"%s\", reason: %s\n", getName().c_str(), ex.what() );
QFile::remove( FsEncoding::decode( ftsIdxName.c_str() ) );
}
}
void StardictDictionary::getArticleText( uint32_t articleAddress, QString & headword, QString & text )
{
try
{
string headwordStr, articleStr;
loadArticle( articleAddress, headwordStr, articleStr );
headword = QString::fromUtf8( headwordStr.data(), headwordStr.size() );
wstring wstr = Utf8::decode( articleStr );
text = Html::unescape( gd::toQString( wstr ) );
}
catch( std::exception &ex )
{
gdWarning( "Stardict: Failed retrieving article from \"%s\", reason: %s\n", getName().c_str(), ex.what() );
}
}
sptr< Dictionary::DataRequest > StardictDictionary::getSearchResults( QString const & searchString,
int searchMode, bool matchCase,
int distanceBetweenWords,
int maxResults,
bool ignoreWordsOrder,
bool ignoreDiacritics )
{
return new FtsHelpers::FTSResultsRequest( *this, searchString,searchMode, matchCase, distanceBetweenWords, maxResults, ignoreWordsOrder, ignoreDiacritics );
}
/// StardictDictionary::findHeadwordsForSynonym()
class StardictHeadwordsRequest;
class StardictHeadwordsRequestRunnable: public QRunnable
{
StardictHeadwordsRequest & r;
QSemaphore & hasExited;
public:
StardictHeadwordsRequestRunnable( StardictHeadwordsRequest & r_,
QSemaphore & hasExited_ ): r( r_ ),
hasExited( hasExited_ )
{}
~StardictHeadwordsRequestRunnable()
{
hasExited.release();
}
virtual void run();
};
class StardictHeadwordsRequest: public Dictionary::WordSearchRequest
{
friend class StardictHeadwordsRequestRunnable;
wstring word;
StardictDictionary & dict;
QAtomicInt isCancelled;
QSemaphore hasExited;
public:
StardictHeadwordsRequest( wstring const & word_,
StardictDictionary & dict_ ):
word( word_ ), dict( dict_ )
{
QThreadPool::globalInstance()->start(
new StardictHeadwordsRequestRunnable( *this, hasExited ) );
}
void run(); // Run from another thread by StardictHeadwordsRequestRunnable
virtual void cancel()
{
isCancelled.ref();
}
~StardictHeadwordsRequest()
{
isCancelled.ref();
hasExited.acquire();
}
};
void StardictHeadwordsRequestRunnable::run()
{
r.run();
}
void StardictHeadwordsRequest::run()
{
if ( Qt4x5::AtomicInt::loadAcquire( isCancelled ) )
{
finish();
return;
}
try
{
vector< WordArticleLink > chain = dict.findArticles( word );
wstring caseFolded = Folding::applySimpleCaseOnly( word );
for( unsigned x = 0; x < chain.size(); ++x )
{
if ( Qt4x5::AtomicInt::loadAcquire( isCancelled ) )
{
finish();
return;
}
string headword, articleText;
dict.loadArticle( chain[ x ].articleOffset,
headword, articleText );
wstring headwordDecoded = Utf8::decode( headword );
if ( caseFolded != Folding::applySimpleCaseOnly( headwordDecoded ) )
{
// The headword seems to differ from the input word, which makes the
// input word its synonym.
Mutex::Lock _( dataMutex );
matches.push_back( headwordDecoded );
}
}
}
catch( std::exception & e )
{
setErrorString( QString::fromUtf8( e.what() ) );
}
finish();
}
sptr< Dictionary::WordSearchRequest >
StardictDictionary::findHeadwordsForSynonym( wstring const & word )
THROW_SPEC( std::exception )
{
return synonymSearchEnabled ? new StardictHeadwordsRequest( word, *this ) :
Class::findHeadwordsForSynonym( word );
}
/// StardictDictionary::getArticle()
class StardictArticleRequest;
class StardictArticleRequestRunnable: public QRunnable
{
StardictArticleRequest & r;
QSemaphore & hasExited;
public:
StardictArticleRequestRunnable( StardictArticleRequest & r_,
QSemaphore & hasExited_ ): r( r_ ),
hasExited( hasExited_ )
{}
~StardictArticleRequestRunnable()
{
hasExited.release();
}
virtual void run();
};
class StardictArticleRequest: public Dictionary::DataRequest
{
friend class StardictArticleRequestRunnable;
wstring word;
vector< wstring > alts;
StardictDictionary & dict;
bool ignoreDiacritics;
QAtomicInt isCancelled;
QSemaphore hasExited;
public:
StardictArticleRequest( wstring const & word_,
vector< wstring > const & alts_,
StardictDictionary & dict_,
bool ignoreDiacritics_ ):
word( word_ ), alts( alts_ ), dict( dict_ ), ignoreDiacritics( ignoreDiacritics_ )
{
QThreadPool::globalInstance()->start(
new StardictArticleRequestRunnable( *this, hasExited ) );
}
void run(); // Run from another thread by StardictArticleRequestRunnable
virtual void cancel()
{
isCancelled.ref();
}
~StardictArticleRequest()
{
isCancelled.ref();
hasExited.acquire();
}
};
void StardictArticleRequestRunnable::run()
{
r.run();
}
void StardictArticleRequest::run()
{
if ( Qt4x5::AtomicInt::loadAcquire( isCancelled ) )
{
finish();
return;
}
try
{
vector< WordArticleLink > chain = dict.findArticles( word, ignoreDiacritics );
for( unsigned x = 0; x < alts.size(); ++x )
{
/// Make an additional query for each alt
vector< WordArticleLink > altChain = dict.findArticles( alts[ x ], ignoreDiacritics );
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
if( ignoreDiacritics )
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
for( unsigned x = 0; x < chain.size(); ++x )
{
if ( Qt4x5::AtomicInt::loadAcquire( isCancelled ) )
{
finish();
return;
}
if ( articlesIncluded.find( chain[ x ].articleOffset ) != articlesIncluded.end() )
continue; // We already have this article in the body.
// Now grab that article
string headword, articleText;
dict.loadArticle( chain[ x ].articleOffset, headword, articleText );
// Ok. Now, does it go to main articles, or to alternate ones? We list
// main ones first, and alternates after.
// We do the case-folded comparison here.
wstring headwordStripped =
Folding::applySimpleCaseOnly( Utf8::decode( headword ) );
if( ignoreDiacritics )
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
multimap< wstring, pair< string, string > > & mapToUse =
( wordCaseFolded == headwordStripped ) ?
mainArticles : alternateArticles;
mapToUse.insert( pair< wstring, pair< string, string > >(
Folding::applySimpleCaseOnly( Utf8::decode( headword ) ),
pair< string, string >( headword, articleText ) ) );
articlesIncluded.insert( chain[ x ].articleOffset );
}
if ( mainArticles.empty() && alternateArticles.empty() )
{
// No such word
finish();
return;
}
string result;
multimap< wstring, pair< string, string > >::const_iterator i;
string cleaner = "</font>""</font>""</font>""</font>""</font>""</font>"
"</font>""</font>""</font>""</font>""</font>""</font>"
"</b></b></b></b></b></b></b></b>"
"</i></i></i></i></i></i></i></i>";
for( i = mainArticles.begin(); i != mainArticles.end(); ++i )
{
result += dict.isFromLanguageRTL() ? "<h3 class=\"sdct_headwords\" dir=\"rtl\">" : "<h3 class=\"sdct_headwords\">";
result += i->second.first;
result += "</h3>";
if( dict.isToLanguageRTL() )
result += "<div style=\"display:inline;\" dir=\"rtl\">";
result += i->second.second;
result += cleaner;
if( dict.isToLanguageRTL() )
result += "</div>";
}
for( i = alternateArticles.begin(); i != alternateArticles.end(); ++i )
{
result += dict.isFromLanguageRTL() ? "<h3 class=\"sdct_headwords\" dir=\"rtl\">" : "<h3 class=\"sdct_headwords\">";
result += i->second.first;
result += "</h3>";
if( dict.isToLanguageRTL() )
result += "<div style=\"display:inline;\" dir=\"rtl\">";
result += i->second.second;
result += cleaner;
if( dict.isToLanguageRTL() )
result += "</div>";
}
Mutex::Lock _( dataMutex );
data.resize( result.size() );
memcpy( &data.front(), result.data(), result.size() );
hasAnyData = true;
}
catch( std::exception & e )
{
setErrorString( QString::fromUtf8( e.what() ) );
}
finish();
}
sptr< Dictionary::DataRequest > StardictDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
bool ignoreDiacritics )
THROW_SPEC( std::exception )
{
return new StardictArticleRequest( word, alts, *this, ignoreDiacritics );
}
static char const * beginsWith( char const * substr, char const * str )
{
size_t len = strlen( substr );
return strncmp( str, substr, len ) == 0 ? str + len : 0;
}
Ifo::Ifo( File::Class & f ):
wordcount( 0 ), synwordcount( 0 ), idxfilesize( 0 ), idxoffsetbits( 32 )
{
static string const versionEq( "version=" );
static string const booknameEq( "bookname=" );
//DPRINTF( "%s<\n", f.gets().c_str() );
//DPRINTF( "%s<\n", f.gets().c_str() );
if ( QString::fromUtf8(f.gets().c_str()) != "StarDict's dict ifo file" ||
f.gets().compare( 0, versionEq.size(), versionEq ) )
throw exNotAnIfoFile();
/// Now go through the file and parse options
try
{
char option[ 16384 ];
for( ; ; )
{
if ( !f.gets( option, sizeof( option ), true ) )
break;
if ( char const * val = beginsWith( "bookname=", option ) )
bookname = val;
else
if ( char const * val = beginsWith( "wordcount=", option ) )
{
if ( sscanf( val, "%u", & wordcount ) != 1 )
throw exBadFieldInIfo( option );
}
else
if ( char const * val = beginsWith( "synwordcount=", option ) )
{
if ( sscanf( val, "%u", & synwordcount ) != 1 )
throw exBadFieldInIfo( option );
}
else
if ( char const * val = beginsWith( "idxfilesize=", option ) )
{
if ( sscanf( val, "%u", & idxfilesize ) != 1 )
throw exBadFieldInIfo( option );
}
else
if ( char const * val = beginsWith( "idxoffsetbits=", option ) )
{
if ( sscanf( val, "%u", & idxoffsetbits ) != 1 || ( idxoffsetbits != 32
&& idxoffsetbits != 64 ) )
throw exBadFieldInIfo( option );
}
else
if ( char const * val = beginsWith( "sametypesequence=", option ) )
sametypesequence = val;
else
if ( char const * val = beginsWith( "dicttype=", option ) )
dicttype = val;
else
if ( char const * val = beginsWith( "description=", option ) )
description = val;
else
if ( char const * val = beginsWith( "copyright=", option ) )
copyright = val;
else
if ( char const * val = beginsWith( "author=", option ) )
author = val;
else
if ( char const * val = beginsWith( "email=", option ) )
email = val;
else
if ( char const * val = beginsWith( "website=", option ) )
website = val;
else
if ( char const * val = beginsWith( "date=", option ) )
date = val;
}
}
catch( File::exReadError & )
{
}
}
//// StardictDictionary::getResource()
class StardictResourceRequest;
class StardictResourceRequestRunnable: public QRunnable
{
StardictResourceRequest & r;
QSemaphore & hasExited;
public:
StardictResourceRequestRunnable( StardictResourceRequest & r_,
QSemaphore & hasExited_ ): r( r_ ),
hasExited( hasExited_ )
{}
~StardictResourceRequestRunnable()
{
hasExited.release();
}
virtual void run();
};
class StardictResourceRequest: public Dictionary::DataRequest
{
friend class StardictResourceRequestRunnable;
StardictDictionary & dict;
string resourceName;
QAtomicInt isCancelled;
QSemaphore hasExited;
public:
StardictResourceRequest( StardictDictionary & dict_,
string const & resourceName_ ):
dict( dict_ ),
resourceName( resourceName_ )
{
QThreadPool::globalInstance()->start(
new StardictResourceRequestRunnable( *this, hasExited ) );
}
void run(); // Run from another thread by StardictResourceRequestRunnable
virtual void cancel()
{
isCancelled.ref();
}
~StardictResourceRequest()
{
isCancelled.ref();
hasExited.acquire();
}
};
void StardictResourceRequestRunnable::run()
{
r.run();
}
void StardictResourceRequest::run()
{
// Some runnables linger enough that they are cancelled before they start
if ( Qt4x5::AtomicInt::loadAcquire( isCancelled ) )
{
finish();
return;
}
try
{
if( resourceName.at( 0 ) == '\x1E' )
resourceName = resourceName.erase( 0, 1 );
if( resourceName.at( resourceName.length() - 1 ) == '\x1F' )
resourceName.erase( resourceName.length() - 1, 1 );
string n =
FsEncoding::dirname( dict.getDictionaryFilenames()[ 0 ] ) +
FsEncoding::separator() +
"res" +
FsEncoding::separator() +
FsEncoding::encode( resourceName );
GD_DPRINTF( "n is %s\n", n.c_str() );
try
{
Mutex::Lock _( dataMutex );
File::loadFromFile( n, data );
}
catch( File::exCantOpen & )
{
// Try reading from zip file
if ( dict.resourceZip.isOpen() )
{
Mutex::Lock _( dict.resourceZipMutex );
Mutex::Lock __( dataMutex );
if ( !dict.resourceZip.loadFile( Utf8::decode( resourceName ), data ) )
throw; // Make it fail since we couldn't read the archive
}
else
throw;
}
if ( Filetype::isNameOfTiff( resourceName ) )
{
// Convert it
dataMutex.lock();
QImage img = QImage::fromData( (unsigned char *) &data.front(),
data.size() );
#ifdef MAKE_EXTRA_TIFF_HANDLER
if( img.isNull() )
GdTiff::tiffToQImage( &data.front(), data.size(), img );
#endif
dataMutex.unlock();
if ( !img.isNull() )
{
// Managed to load -- now store it back as BMP
QByteArray ba;
QBuffer buffer( &ba );
buffer.open( QIODevice::WriteOnly );
img.save( &buffer, "BMP" );
Mutex::Lock _( dataMutex );
data.resize( buffer.size() );
memcpy( &data.front(), buffer.data(), data.size() );
}
}
if( Filetype::isNameOfCSS( resourceName ) )
{
Mutex::Lock _( dataMutex );
QString css = QString::fromUtf8( data.data(), data.size() );
// Correct some url's
QString id = QString::fromUtf8( dict.getId().c_str() );
int pos = 0;
#if QT_VERSION >= QT_VERSION_CHECK( 5, 0, 0 )
QRegularExpression links( "url\\(\\s*(['\"]?)([^'\"]*)(['\"]?)\\s*\\)",
QRegularExpression::CaseInsensitiveOption );
QString newCSS;
QRegularExpressionMatchIterator it = links.globalMatch( css );
while( it.hasNext() )
{
QRegularExpressionMatch match = it.next();
newCSS += css.midRef( pos, match.capturedStart() - pos );
pos = match.capturedEnd();
QString url = match.captured( 2 );
if( url.indexOf( ":/" ) >= 0 || url.indexOf( "data:" ) >= 0)
{
// External link
newCSS += match.captured();
continue;
}
QString newUrl = QString( "url(" ) + match.captured( 1 ) + "bres://"
+ id + "/" + url + match.captured( 3 ) + ")";
newCSS += newUrl;
}
if( pos )
{
newCSS += css.midRef( pos );
css = newCSS;
newCSS.clear();
}
#else
QRegExp links( "url\\(\\s*(['\"]?)([^'\"]*)(['\"]?)\\s*\\)", Qt::CaseInsensitive, QRegExp::RegExp );
for( ; ; )
{
pos = links.indexIn( css, pos );
if( pos < 0 )
break;
QString url = links.cap( 2 );
if( url.indexOf( ":/" ) >= 0 || url.indexOf( "data:" ) >= 0)
{
// External link
pos += links.cap().size();
continue;
}
QString newUrl = QString( "url(" ) + links.cap( 1 ) + "bres://"
+ id + "/" + url + links.cap( 3 ) + ")";
css.replace( pos, links.cap().size(), newUrl );
pos += newUrl.size();
}
#endif
dict.isolateCSS( css );
QByteArray bytes = css.toUtf8();
data.resize( bytes.size() );
memcpy( &data.front(), bytes.constData(), bytes.size() );
}
hasAnyData = true;
}
catch( std::exception &ex )
{
gdWarning( "Stardict: Failed loading resource \"%s\" for \"%s\", reason: %s\n",
resourceName.c_str(), dict.getName().c_str(), ex.what() );
// Resource not loaded -- we don't set the hasAnyData flag then
}
catch( ... )
{
}
finish();
}
sptr< Dictionary::DataRequest > StardictDictionary::getResource( string const & name )
THROW_SPEC( std::exception )
{
return new StardictResourceRequest( *this, name );
}
} // anonymous namespace
static void findCorrespondingFiles( string const & ifo,
string & idx, string & dict, string & syn )
{
string base( ifo, 0, ifo.size() - 3 );
if ( !(
File::tryPossibleName( base + "idx", idx ) ||
File::tryPossibleName( base + "idx.gz", idx ) ||
File::tryPossibleName( base + "idx.dz", idx ) ||
File::tryPossibleName( base + "IDX", idx ) ||
File::tryPossibleName( base + "IDX.GZ", idx ) ||
File::tryPossibleName( base + "IDX.DZ", idx )
) )
throw exNoIdxFile( ifo );
if ( !(
File::tryPossibleName( base + "dict", dict ) ||
File::tryPossibleName( base + "dict.dz", dict ) ||
File::tryPossibleName( base + "DICT", dict ) ||
File::tryPossibleName( base + "dict.DZ", dict )
) )
throw exNoDictFile( ifo );
if ( !(
File::tryPossibleName( base + "syn", syn ) ||
File::tryPossibleName( base + "syn.gz", syn ) ||
File::tryPossibleName( base + "syn.dz", syn ) ||
File::tryPossibleName( base + "SYN", syn ) ||
File::tryPossibleName( base + "SYN.GZ", syn ) ||
File::tryPossibleName( base + "SYN.DZ", syn )
) )
syn.clear();
}
static void handleIdxSynFile( string const & fileName,
IndexedWords & indexedWords,
ChunkedStorage::Writer & chunks,
vector< uint32_t > * articleOffsets,
bool isSynFile, bool parseHeadwords )
{
gzFile stardictIdx = gd_gzopen( fileName.c_str() );
if ( !stardictIdx )
throw exCantReadFile( fileName );
vector< char > image;
for( ; ; )
{
size_t oldSize = image.size();
image.resize( oldSize + 65536 );
int rd = gzread( stardictIdx, &image.front() + oldSize, 65536 );
if ( rd < 0 )
{
gzclose( stardictIdx );
throw exCantReadFile( fileName );
}
if ( rd != 65536 )
{
image.resize( oldSize + rd + 1 );
break;
}
}
gzclose( stardictIdx );
// We append one zero byte to catch runaway string at the end, if any
image.back() = 0;
// Now parse it
for( char const * ptr = &image.front(); ptr != &image.back(); )
{
size_t wordLen = strlen( ptr );
if ( ptr + wordLen + 1 + ( isSynFile ? sizeof( uint32_t ) :
sizeof( uint32_t ) * 2 ) >
&image.back() )
{
GD_FDPRINTF( stderr, "Warning: sudden end of file %s\n", fileName.c_str() );
break;
}
char const * word = ptr;
ptr += wordLen + 1;
uint32_t offset;
if( strstr( word, "&#" ) )
{
// Decode some html-coded symbols in headword
string unescapedWord = Html::unescapeUtf8( word );
strncpy( (char *)word, unescapedWord.c_str(), wordLen );
wordLen = strlen( word );
}
if ( !isSynFile )
{
// We're processing the .idx file
uint32_t articleOffset, articleSize;
memcpy( &articleOffset, ptr, sizeof( uint32_t ) );
ptr += sizeof( uint32_t );
memcpy( &articleSize, ptr, sizeof( uint32_t ) );
ptr += sizeof( uint32_t );
articleOffset = ntohl( articleOffset );
articleSize = ntohl( articleSize );
// Create an entry for the article in the chunked storage
offset = chunks.startNewBlock();
if ( articleOffsets )
articleOffsets->push_back( offset );
chunks.addToBlock( &articleOffset, sizeof( uint32_t ) );
chunks.addToBlock( &articleSize, sizeof( uint32_t ) );
chunks.addToBlock( word, wordLen + 1 );
}
else
{
// We're processing the .syn file
uint32_t offsetInIndex;
memcpy( &offsetInIndex, ptr, sizeof( uint32_t ) );
ptr += sizeof( uint32_t );
offsetInIndex = ntohl( offsetInIndex );
if ( offsetInIndex >= articleOffsets->size() )
throw exIncorrectOffset( fileName );
offset = (*articleOffsets)[ offsetInIndex ];
// Some StarDict dictionaries are in fact badly converted Babylon ones.
// They contain a lot of superfluous slashed entries with dollar signs.
// We try to filter them out here, since those entries become much more
// apparent in GoldenDict than they were in StarDict because of
// punctuation folding. Hopefully there are not a whole lot of valid
// synonyms which really start from slash and contain dollar signs, or
// end with dollar and contain slashes.
if ( *word == '/' )
{
if ( strchr( word, '$' ) )
continue; // Skip this entry
}
else
if ( wordLen && word[ wordLen - 1 ] == '$' )
{
if ( strchr( word, '/' ) )
continue; // Skip this entry
}
}
// Insert new entry into an index
if( parseHeadwords )
indexedWords.addWord( Utf8::decode( word ), offset );
else
indexedWords.addSingleWord( Utf8::decode( word ), offset );
}
GD_DPRINTF( "%u entires made\n", (unsigned) indexedWords.size() );
}
vector< sptr< Dictionary::Class > > makeDictionaries(
vector< string > const & fileNames,
string const & indicesDir,
Dictionary::Initializing & initializing,
unsigned maxHeadwordsToExpand )
THROW_SPEC( std::exception )
{
vector< sptr< Dictionary::Class > > dictionaries;
for( vector< string >::const_iterator i = fileNames.begin(); i != fileNames.end();
++i )
{
if ( i->size() < 4 ||
strcasecmp( i->c_str() + ( i->size() - 4 ), ".ifo" ) != 0 )
continue;
try
{
vector< string > dictFiles( 1, *i );
string idxFileName, dictFileName, synFileName;
findCorrespondingFiles( *i, idxFileName, dictFileName, synFileName );
dictFiles.push_back( idxFileName );
dictFiles.push_back( dictFileName );
if ( synFileName.size() )
dictFiles.push_back( synFileName );
// See if there's a zip file with resources present. If so, include it.
string zipFileName;
string baseName = FsEncoding::dirname( idxFileName ) + FsEncoding::separator();
if ( File::tryPossibleZipName( baseName + "res.zip", zipFileName ) ||
File::tryPossibleZipName( baseName + "RES.ZIP", zipFileName ) ||
File::tryPossibleZipName( baseName + "res" + FsEncoding::separator() + "res.zip", zipFileName ) )
dictFiles.push_back( zipFileName );
string dictId = Dictionary::makeDictionaryId( dictFiles );
string indexFile = indicesDir + dictId;
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) ||
indexIsOldOrBad( indexFile ) )
{
// Building the index
File::Class ifoFile( *i, "r" );
Ifo ifo( ifoFile );
gdDebug( "Stardict: Building the index for dictionary: %s\n", ifo.bookname.c_str() );
if ( ifo.idxoffsetbits == 64 )
throw ex64BitsNotSupported();
if ( ifo.dicttype.size() )
throw exDicttypeNotSupported();
if( synFileName.empty() )
{
if ( ifo.synwordcount )
{
GD_DPRINTF( "Warning: dictionary has synwordcount specified, but no "
"corresponding .syn file was found\n" );
ifo.synwordcount = 0; // Pretend it wasn't there
}
}
else
if ( !ifo.synwordcount )
{
GD_DPRINTF( "Warning: ignoring .syn file %s, since there's no synwordcount in .ifo specified\n",
synFileName.c_str() );
}
GD_DPRINTF( "bookname = %s\n", ifo.bookname.c_str() );
GD_DPRINTF( "wordcount = %u\n", ifo.wordcount );
initializing.indexingDictionary( ifo.bookname );
File::Class idx( indexFile, "wb" );
IdxHeader idxHeader;
memset( &idxHeader, 0, sizeof( idxHeader ) );
// We write a dummy header first. At the end of the process the header
// will be rewritten with the right values.
idx.write( idxHeader );
idx.write( ifo.bookname.data(), ifo.bookname.size() );
idx.write( ifo.sametypesequence.data(), ifo.sametypesequence.size() );
IndexedWords indexedWords;
ChunkedStorage::Writer chunks( idx );
// Load indices
if ( !ifo.synwordcount )
handleIdxSynFile( idxFileName, indexedWords, chunks, 0, false,
!maxHeadwordsToExpand || ifo.wordcount < maxHeadwordsToExpand );
else
{
vector< uint32_t > articleOffsets;
articleOffsets.reserve( ifo.wordcount );
handleIdxSynFile( idxFileName, indexedWords, chunks, &articleOffsets,
false,
!maxHeadwordsToExpand || ( ifo.wordcount + ifo.synwordcount ) < maxHeadwordsToExpand );
handleIdxSynFile( synFileName, indexedWords, chunks, &articleOffsets,
true,
!maxHeadwordsToExpand || ( ifo.wordcount + ifo.synwordcount ) < maxHeadwordsToExpand );
}
// Finish with the chunks
idxHeader.chunksOffset = chunks.finish();
// Build index
IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedWords, idx );
idxHeader.indexBtreeMaxElements = idxInfo.btreeMaxElements;
idxHeader.indexRootOffset = idxInfo.rootOffset;
// That concludes it. Update the header.
idxHeader.signature = Signature;
idxHeader.formatVersion = CurrentFormatVersion;
idxHeader.wordCount = ifo.wordcount;
idxHeader.synWordCount = ifo.synwordcount;
idxHeader.bookNameSize = ifo.bookname.size();
idxHeader.sameTypeSequenceSize = ifo.sametypesequence.size();
// read languages
QPair<quint32,quint32> langs =
LangCoder::findIdsForFilename( QString::fromStdString( dictFileName ) );
// if no languages found, try dictionary's name
if ( langs.first == 0 || langs.second == 0 )
{
langs =
LangCoder::findIdsForFilename( QString::fromStdString( ifo.bookname ) );
}
idxHeader.langFrom = langs.first;
idxHeader.langTo = langs.second;
// If there was a zip file, index it too
if ( zipFileName.size() )
{
GD_DPRINTF( "Indexing zip file\n" );
idxHeader.hasZipFile = 1;
IndexedWords zipFileNames;
IndexedZip zipFile;
if( zipFile.openZipFile( QDir::fromNativeSeparators(
FsEncoding::decode( zipFileName.c_str() ) ) ) )
zipFile.indexFile( zipFileNames );
if( !zipFileNames.empty() )
{
// Build the resulting zip file index
IndexInfo idxInfo = BtreeIndexing::buildIndex( zipFileNames, idx );
idxHeader.zipIndexBtreeMaxElements = idxInfo.btreeMaxElements;
idxHeader.zipIndexRootOffset = idxInfo.rootOffset;
}
else
{
// Bad zip file -- no index (though the mark that we have one
// remains)
idxHeader.zipIndexBtreeMaxElements = 0;
idxHeader.zipIndexRootOffset = 0;
}
}
else
idxHeader.hasZipFile = 0;
// That concludes it. Update the header.
idx.rewind();
idx.write( &idxHeader, sizeof( idxHeader ) );
}
dictionaries.push_back( new StardictDictionary( dictId,
indexFile,
dictFiles ) );
}
catch( std::exception & e )
{
gdWarning( "Stardict dictionary initializing failed: %s, error: %s\n",
i->c_str(), e.what() );
}
}
return dictionaries;
}
}
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化