- 前两个字段INDEX_MAGIC_HEADER和INDEX_FORMAT_VERSION分别是magic number和索引版本号
- 第三个字段USE_64BIT表示是否使用64位的document和word id(默认是使用).
- 然后是写入docinfo,这个字段也就是配置中的docinfo字段(index block中)
- 接下来将会写入schema,也就是索引的schema信息,比如当前索引的字段名,当前需要建立的属性名等等.
{
// schema
fdInfo.PutDword ( tSchema.m_dFields.GetLength() );
ARRAY_FOREACH ( i, tSchema.m_dFields )
WriteSchemaColumn ( fdInfo, tSchema.m_dFields[i] );
fdInfo.PutDword ( tSchema.GetAttrsCount() );
for ( int i=0; i<tSchema.GetAttrsCount(); i++ )
WriteSchemaColumn ( fdInfo, tSchema.GetAttr(i) );
}
- 然后是写入当前索引集的最小doc id(m_uMinDocid)
- 接下来是根据docinfo(也就是属性存储)的配置来选择是否写入行信息(当docinfo为inline的话,表示attribute value 将会存储在spd文件中).
- 然后是写入wordlist的checkpoint.
- 接下来是写入对应的索引配置信息
void SaveTokenizerSettings ( CSphWriter & tWriter, ISphTokenizer * pTokenizer, int iEmbeddedLimit )
{
const CSphTokenizerSettings & tSettings = pTokenizer->GetSettings ();
tWriter.PutByte ( tSettings.m_iType );
tWriter.PutString ( tSettings.m_sCaseFolding.cstr () );
tWriter.PutDword ( tSettings.m_iMinWordLen );
bool bEmbedSynonyms = pTokenizer->GetSynFileInfo ().m_uSize<=(SphOffset_t)iEmbeddedLimit;
tWriter.PutByte ( bEmbedSynonyms ? 1 : 0 );
if ( bEmbedSynonyms )
pTokenizer->WriteSynonyms ( tWriter );
tWriter.PutString ( tSettings.m_sSynonymsFile.cstr () );
tWriter.PutString ( tSettings.m_sIgnoreChars.cstr () );
tWriter.PutDword ( tSettings.m_iNgramLen );
tWriter.PutString ( tSettings.m_sNgramChars.cstr () );
tWriter.PutString ( tSettings.m_sBlendChars.cstr () );
tWriter.PutString ( tSettings.m_sBlendMode.cstr () );
}
- 写入dictionary的配置信息(比如stop word之类).
- 然后是写入killlist的size(m_uKillListSize)
- 写入m_iMinMaxIndex,这个选项也就是表示document size.
CSphFixedVector<CSphRowitem> dMinRow ( tNewSchema.GetRowSize() );
...............
int iNewStride = DOCINFO_IDSIZE + tNewSchema.GetRowSize();
int64_t iNewMinMaxIndex = m_iDocinfo * iNewStride;
tBuildHeader.m_iMinMaxIndex = iNewMinMaxIndex;
- 写入regex相关配置(regexp_filter)