From a84ae7842ccffe56335e2ba0e736d18601315716 Mon Sep 17 00:00:00 2001 From: Danny Corluy Date: Fri, 23 Dec 2022 15:48:41 +0100 Subject: [PATCH] SOLR-164 This patch allows Solr to use different Schemas for different shards within a collection, based on the configSet defined in core.properties This was introduced in order to work around an issue which only occurred when making schema changes which resulted in incompatible segment merges, e.g. when enabling docValues on the unique key field in new shards, without reindexing the data in old shards. Adding/updating documents in the old shards would then break subsequent searches (presumably because the shard then contains a mixture of entries with and without docValues); but only after a segment merge. This patch forces Solr to use the appropriate schema whenever possible: the one defined in core.properties if it exists, or the collection's schema if no specific schema is configured for a given core. --- .../solr/cloud/CloudConfigSetService.java | 16 +- .../java/org/apache/solr/core/ConfigSet.java | 12 +- .../apache/solr/core/ConfigSetService.java | 28 +- .../org/apache/solr/core/CoreDescriptor.java | 5 + .../java/org/apache/solr/core/SolrCore.java | 24 +- .../apache/solr/update/AddUpdateCommand.java | 14 +- .../solr/update/DirectUpdateHandler2.java | 15 +- .../ClassificationUpdateProcessor.java | 2 +- .../conf/schema.xml | 50 ++ .../conf/solrconfig.xml | 51 ++ .../schema-withDocValues.xml | 51 ++ ...tAddDocumentToShardAfterSchemaUpdated.java | 586 ++++++++++++++++++ .../apache/solr/core/TestCodecSupport.java | 2 +- 13 files changed, 828 insertions(+), 28 deletions(-) create mode 100644 solr/core/src/test-files/solr/configsets/cloud-minimal-extSchema-noDocValues/conf/schema.xml create mode 100644 solr/core/src/test-files/solr/configsets/cloud-minimal-extSchema-noDocValues/conf/solrconfig.xml create mode 100644 solr/core/src/test-files/solr/configsets/cloud-minimal-extSchema-noDocValues/schema-withDocValues.xml create mode 100644 solr/core/src/test/org/apache/solr/cloud/TestAddDocumentToShardAfterSchemaUpdated.java diff --git a/solr/core/src/java/org/apache/solr/cloud/CloudConfigSetService.java b/solr/core/src/java/org/apache/solr/cloud/CloudConfigSetService.java index 9b16d231e02..694955d2250 100644 --- a/solr/core/src/java/org/apache/solr/cloud/CloudConfigSetService.java +++ b/solr/core/src/java/org/apache/solr/cloud/CloudConfigSetService.java @@ -39,7 +39,7 @@ public class CloudConfigSetService extends ConfigSetService { } @Override - public SolrResourceLoader createCoreResourceLoader(CoreDescriptor cd) { + public SolrResourceLoader createCoreResourceLoader(CoreDescriptor cd, boolean coreSpecific) { try { // for back compat with cores that can create collections without the collections API if (!zkController.getZkClient().exists(ZkStateReader.COLLECTIONS_ZKNODE + "/" + cd.getCollectionName(), true)) { @@ -53,8 +53,18 @@ public class CloudConfigSetService extends ConfigSetService { } String configName = zkController.getZkStateReader().readConfigName(cd.getCollectionName()); - return new ZkSolrResourceLoader(cd.getInstanceDir(), configName, parentLoader.getClassLoader(), - cd.getSubstitutableProperties(), zkController); + if (coreSpecific) { + String coreConfigName = cd.getCollectionConfigSet(); + if (coreConfigName != null && !coreConfigName.equals(configName)) { + return new ZkSolrResourceLoader(cd.getInstanceDir(), coreConfigName, parentLoader.getClassLoader(), + cd.getSubstitutableProperties(), zkController); + } else { + return null; + } + } else { + return new ZkSolrResourceLoader(cd.getInstanceDir(), configName, parentLoader.getClassLoader(), + cd.getSubstitutableProperties(), zkController); + } } @Override diff --git a/solr/core/src/java/org/apache/solr/core/ConfigSet.java b/solr/core/src/java/org/apache/solr/core/ConfigSet.java index e0c9fe44def..dbb9dd38427 100644 --- a/solr/core/src/java/org/apache/solr/core/ConfigSet.java +++ b/solr/core/src/java/org/apache/solr/core/ConfigSet.java @@ -30,15 +30,19 @@ public class ConfigSet { private final IndexSchema indexSchema; + // coreSchema is only set when the core ConfigSet differs from the collection ConfigSet + private final IndexSchema coreSchema; + private final NamedList properties; private final boolean trusted; - public ConfigSet(String name, SolrConfig solrConfig, IndexSchema indexSchema, + public ConfigSet(String name, SolrConfig solrConfig, IndexSchema indexSchema, IndexSchema coreSchema, NamedList properties, boolean trusted) { this.name = name; this.solrconfig = solrConfig; this.indexSchema = indexSchema; + this.coreSchema = coreSchema; this.properties = properties; this.trusted = trusted; } @@ -55,6 +59,10 @@ public class ConfigSet { return indexSchema; } + public IndexSchema getCoreSchema() { + return coreSchema; + } + public NamedList getProperties() { return properties; } @@ -62,4 +70,4 @@ public class ConfigSet { public boolean isTrusted() { return trusted; } -} +} \ No newline at end of file diff --git a/solr/core/src/java/org/apache/solr/core/ConfigSetService.java b/solr/core/src/java/org/apache/solr/core/ConfigSetService.java index 69e160b5da1..9ad3fb5982c 100644 --- a/solr/core/src/java/org/apache/solr/core/ConfigSetService.java +++ b/solr/core/src/java/org/apache/solr/core/ConfigSetService.java @@ -72,7 +72,7 @@ public abstract class ConfigSetService { */ public final ConfigSet getConfig(CoreDescriptor dcore) { - SolrResourceLoader coreLoader = createCoreResourceLoader(dcore); + SolrResourceLoader coreLoader = createCoreResourceLoader(dcore, false); try { @@ -90,7 +90,8 @@ public abstract class ConfigSetService { SolrConfig solrConfig = createSolrConfig(dcore, coreLoader); IndexSchema schema = createIndexSchema(dcore, solrConfig); - return new ConfigSet(configName(dcore), solrConfig, schema, properties, trusted); + IndexSchema coreSchema = createCoreSchema(dcore); + return new ConfigSet(configName(dcore), solrConfig, schema, coreSchema, properties, trusted); } catch (Exception e) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Could not load conf for core " + dcore.getName() + @@ -119,6 +120,23 @@ public abstract class ConfigSetService { return IndexSchemaFactory.buildIndexSchema(cd.getSchemaName(), solrConfig); } + /** + * Core schema can differ from the index schema in SolrCloud when the core.properties file has a different ConfigSet than the + * one on the collection. + * Creating a core-specific schema can prevent issues with the Lucene index + * @param cd the core's CoreDescriptor + */ + protected IndexSchema createCoreSchema(CoreDescriptor cd) { + SolrResourceLoader coreLoader = createCoreResourceLoader(cd, true); + if (coreLoader != null ) { + log.info("Loading different schema for core {}.", cd.getName()); + SolrConfig solrConfig = createSolrConfig(cd, coreLoader); + return IndexSchemaFactory.buildIndexSchema(cd.getSchemaName(), solrConfig); + } else { + return null; + } + } + /** * Return the ConfigSet properties * @param cd the core's CoreDescriptor @@ -144,9 +162,10 @@ public abstract class ConfigSetService { /** * Create a SolrResourceLoader for a core * @param cd the core's CoreDescriptor + * @param coreSpecific true for core specific configuration. If that is the same as the collection configuration or doesn't exist, null will be returned * @return a SolrResourceLoader */ - protected abstract SolrResourceLoader createCoreResourceLoader(CoreDescriptor cd); + protected abstract SolrResourceLoader createCoreResourceLoader(CoreDescriptor cd, boolean coreSpecific); /** * Return a name for the ConfigSet for a core @@ -178,7 +197,8 @@ public abstract class ConfigSetService { } @Override - public SolrResourceLoader createCoreResourceLoader(CoreDescriptor cd) { + public SolrResourceLoader createCoreResourceLoader(CoreDescriptor cd, boolean coreSpecific) { + if (coreSpecific) return null; Path instanceDir = locateInstanceDir(cd); return new SolrResourceLoader(instanceDir, parentLoader.getClassLoader(), cd.getSubstitutableProperties()); } diff --git a/solr/core/src/java/org/apache/solr/core/CoreDescriptor.java b/solr/core/src/java/org/apache/solr/core/CoreDescriptor.java index 1747fa2ca12..cd8a7f4ac07 100644 --- a/solr/core/src/java/org/apache/solr/core/CoreDescriptor.java +++ b/solr/core/src/java/org/apache/solr/core/CoreDescriptor.java @@ -35,6 +35,7 @@ import com.google.common.collect.ImmutableMap; import org.apache.commons.lang.StringUtils; import org.apache.solr.cloud.CloudDescriptor; import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.CollectionAdminParams; import org.apache.solr.util.PropertiesUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -386,6 +387,10 @@ public class CoreDescriptor { return coreProperties.getProperty(CORE_CONFIGSET_PROPERTIES); } + public String getCollectionConfigSet() { + return coreProperties.getProperty(CollectionAdminParams.COLL_CONF); + } + public boolean isConfigSetTrusted() { return trustedConfigSet; } diff --git a/solr/core/src/java/org/apache/solr/core/SolrCore.java b/solr/core/src/java/org/apache/solr/core/SolrCore.java index b64a6c65a66..580ba7fed87 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrCore.java +++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java @@ -197,6 +197,11 @@ public final class SolrCore implements SolrInfoBean, SolrMetricProducer, Closeab private final SolrConfig solrConfig; private final SolrResourceLoader resourceLoader; private volatile IndexSchema schema; + + // Local core schema is present when the schema differs from the index schema of the collection. + // This schema should not be updateable and should be compatible with the schema of the collection. + private final IndexSchema localCoreSchema; + private final NamedList configSetProperties; private final String dataDir; private final String ulogDir; @@ -299,6 +304,14 @@ public final class SolrCore implements SolrInfoBean, SolrMetricProducer, Closeab return schema; } + /** + * Alternative core specific index schema to be used when converting documents to be added to the lucene index. + * @return core specific index schema, or null when schema is the same as index schema on the collection + */ + public IndexSchema getUpdateSchema() { + return localCoreSchema; + } + /** * Sets the latest schema snapshot to be used by this core instance. * If the specified replacementSchema uses a {@link SimilarityFactory} which is @@ -320,7 +333,7 @@ public final class SolrCore implements SolrInfoBean, SolrMetricProducer, Closeab } this.schema = replacementSchema; } - + public NamedList getConfigSetProperties() { return configSetProperties; } @@ -663,7 +676,7 @@ public final class SolrCore implements SolrInfoBean, SolrMetricProducer, Closeab CoreDescriptor cd = new CoreDescriptor(name, getCoreDescriptor()); cd.loadExtraProperties(); //Reload the extra properties core = new SolrCore(coreContainer, getName(), getDataDir(), coreConfig.getSolrConfig(), - coreConfig.getIndexSchema(), coreConfig.getProperties(), + coreConfig.getIndexSchema(), coreConfig.getCoreSchema(), coreConfig.getProperties(), cd, updateHandler, solrDelPolicy, currentCore, true); // we open a new IndexWriter to pick up the latest config @@ -871,8 +884,8 @@ public final class SolrCore implements SolrInfoBean, SolrMetricProducer, Closeab } public SolrCore(CoreContainer coreContainer, CoreDescriptor cd, ConfigSet coreConfig) { - this(coreContainer, cd.getName(), null, coreConfig.getSolrConfig(), coreConfig.getIndexSchema(), coreConfig.getProperties(), - cd, null, null, null, false); + this(coreContainer, cd.getName(), null, coreConfig.getSolrConfig(), coreConfig.getIndexSchema(), coreConfig.getCoreSchema(), + coreConfig.getProperties(), cd, null, null, null, false); } public CoreContainer getCoreContainer() { @@ -894,13 +907,14 @@ public final class SolrCore implements SolrInfoBean, SolrMetricProducer, Closeab * @since solr 1.3 */ public SolrCore(CoreContainer coreContainer, String name, String dataDir, SolrConfig config, - IndexSchema schema, NamedList configSetProperties, + IndexSchema schema, IndexSchema localCoreSchema, NamedList configSetProperties, CoreDescriptor coreDescriptor, UpdateHandler updateHandler, IndexDeletionPolicyWrapper delPolicy, SolrCore prev, boolean reload) { assert ObjectReleaseTracker.track(searcherExecutor); // ensure that in unclean shutdown tests we still close this this.coreContainer = coreContainer; + this.localCoreSchema = localCoreSchema; final CountDownLatch latch = new CountDownLatch(1); diff --git a/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java b/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java index cfa937e7cb3..5902facae89 100644 --- a/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java +++ b/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java @@ -92,14 +92,14 @@ public class AddUpdateCommand extends UpdateCommand { /** * Creates and returns a lucene Document to index. - * Nested documents, if found, will cause an exception to be thrown. Call {@link #getLuceneDocsIfNested()} for that. + * Nested documents, if found, will cause an exception to be thrown. Call {@link #getLuceneDocsIfNested(IndexSchema)} for that. * Any changes made to the returned Document will not be reflected in the SolrInputDocument, or future calls to this * method. * Note that the behavior of this is sensitive to {@link #isInPlaceUpdate()}. */ - public Document getLuceneDocument() { + public Document getLuceneDocument(IndexSchema schema) { final boolean ignoreNestedDocs = false; // throw an exception if found - return DocumentBuilder.toDocument(getSolrInputDocument(), req.getSchema(), isInPlaceUpdate(), ignoreNestedDocs); + return DocumentBuilder.toDocument(getSolrInputDocument(), schema, isInPlaceUpdate(), ignoreNestedDocs); } /** Returns the indexed ID for this document. The returned BytesRef is retained across multiple calls, and should not be modified. */ @@ -177,14 +177,14 @@ public class AddUpdateCommand extends UpdateCommand { /** * Computes the final flattened Solr docs that are ready to be converted to Lucene docs. If no flattening is - * performed then we return null, and the caller ought to use {@link #getLuceneDocument()} instead. + * performed then we return null, and the caller ought to use {@link #getLuceneDocument(IndexSchema)} instead. * This should only be called once. * Any changes made to the returned Document(s) will not be reflected in the SolrInputDocument, * or future calls to this method. */ - public Iterable getLuceneDocsIfNested() { + public Iterable getLuceneDocsIfNested(IndexSchema schema) { assert ! isInPlaceUpdate() : "We don't expect this to happen."; // but should "work"? - if (!req.getSchema().isUsableForChildDocs()) { + if (!schema.isUsableForChildDocs()) { // note if the doc is nested despite this, we'll throw an exception elsewhere return null; } @@ -205,7 +205,7 @@ public class AddUpdateCommand extends UpdateCommand { // then we could add this field to the generated lucene document instead. } - return () -> all.stream().map(sdoc -> DocumentBuilder.toDocument(sdoc, req.getSchema())).iterator(); + return () -> all.stream().map(sdoc -> DocumentBuilder.toDocument(sdoc, schema)).iterator(); } private List flatten(SolrInputDocument root) { diff --git a/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java b/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java index f466004a5ff..a5023f415af 100644 --- a/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java +++ b/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java @@ -319,11 +319,11 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState RefCounted iw = solrCoreState.getIndexWriter(core); try { IndexWriter writer = iw.get(); - Iterable blockDocs = cmd.getLuceneDocsIfNested(); + Iterable blockDocs = cmd.getLuceneDocsIfNested(getUpdateSchema(cmd)); if (blockDocs != null) { writer.addDocuments(blockDocs); } else { - writer.addDocument(cmd.getLuceneDocument()); + writer.addDocument(cmd.getLuceneDocument(getUpdateSchema(cmd))); } if (ulog != null) ulog.add(cmd); @@ -935,6 +935,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState assert idField != null; // this code path requires an idField in order to potentially replace a doc boolean hasUpdateTerm = cmd.updateTerm != null; // AKA dedupe + IndexSchema schema = getUpdateSchema(cmd); if (cmd.isInPlaceUpdate()) { if (hasUpdateTerm) { throw new IllegalStateException("cmd.updateTerm/dedupe is not compatible with in-place updates"); @@ -942,7 +943,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState // we don't support the solrInputDoc with nested child docs either but we'll throw an exception if attempted Term updateTerm = new Term(idField.getName(), cmd.getIndexedId()); - Document luceneDocument = cmd.getLuceneDocument(); + Document luceneDocument = cmd.getLuceneDocument(schema); final List origDocFields = luceneDocument.getFields(); final List fieldsToUpdate = new ArrayList<>(origDocFields.size()); @@ -956,7 +957,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState } else { // more normal path - Iterable blockDocs = cmd.getLuceneDocsIfNested(); + Iterable blockDocs = cmd.getLuceneDocsIfNested(schema); boolean isBlock = blockDocs != null; // AKA nested child docs Term idTerm = new Term(isBlock ? IndexSchema.ROOT_FIELD_NAME : idField.getName(), cmd.getIndexedId()); Term updateTerm = hasUpdateTerm ? cmd.updateTerm : idTerm; @@ -964,7 +965,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState log.debug("updateDocuments({})", cmd); writer.updateDocuments(updateTerm, blockDocs); } else { - Document luceneDocument = cmd.getLuceneDocument(); + Document luceneDocument = cmd.getLuceneDocument(schema); log.debug("updateDocument({})", cmd); writer.updateDocument(updateTerm, luceneDocument); } @@ -980,6 +981,10 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState } } + private IndexSchema getUpdateSchema(AddUpdateCommand cmd) { + return core.getUpdateSchema() != null?core.getUpdateSchema():cmd.getReq().getSchema(); + } + ///////////////////////////////////////////////////////////////////// // SolrInfoBean stuff: Statistics and Module Info diff --git a/solr/core/src/java/org/apache/solr/update/processor/ClassificationUpdateProcessor.java b/solr/core/src/java/org/apache/solr/update/processor/ClassificationUpdateProcessor.java index 8ce98145c48..703445c2a30 100644 --- a/solr/core/src/java/org/apache/solr/update/processor/ClassificationUpdateProcessor.java +++ b/solr/core/src/java/org/apache/solr/update/processor/ClassificationUpdateProcessor.java @@ -100,7 +100,7 @@ class ClassificationUpdateProcessor public void processAdd(AddUpdateCommand cmd) throws IOException { SolrInputDocument doc = cmd.getSolrInputDocument(); - Document luceneDocument = cmd.getLuceneDocument(); + Document luceneDocument = cmd.getLuceneDocument(cmd.getReq().getSchema()); String assignedClass; Object documentClass = doc.getFieldValue(trainingClassField); if (documentClass == null) { diff --git a/solr/core/src/test-files/solr/configsets/cloud-minimal-extSchema-noDocValues/conf/schema.xml b/solr/core/src/test-files/solr/configsets/cloud-minimal-extSchema-noDocValues/conf/schema.xml new file mode 100644 index 00000000000..54010924994 --- /dev/null +++ b/solr/core/src/test-files/solr/configsets/cloud-minimal-extSchema-noDocValues/conf/schema.xml @@ -0,0 +1,50 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + identifier + diff --git a/solr/core/src/test-files/solr/configsets/cloud-minimal-extSchema-noDocValues/conf/solrconfig.xml b/solr/core/src/test-files/solr/configsets/cloud-minimal-extSchema-noDocValues/conf/solrconfig.xml new file mode 100644 index 00000000000..f6718427632 --- /dev/null +++ b/solr/core/src/test-files/solr/configsets/cloud-minimal-extSchema-noDocValues/conf/solrconfig.xml @@ -0,0 +1,51 @@ + + + + + + + + + ${solr.data.dir:} + + + + + ${tests.luceneMatchVersion:LATEST} + + + + ${solr.commitwithin.softcommit:true} + + + + + + + explicit + true + text + + + + + + + + diff --git a/solr/core/src/test-files/solr/configsets/cloud-minimal-extSchema-noDocValues/schema-withDocValues.xml b/solr/core/src/test-files/solr/configsets/cloud-minimal-extSchema-noDocValues/schema-withDocValues.xml new file mode 100644 index 00000000000..b8e6b1f095e --- /dev/null +++ b/solr/core/src/test-files/solr/configsets/cloud-minimal-extSchema-noDocValues/schema-withDocValues.xml @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + identifier + diff --git a/solr/core/src/test/org/apache/solr/cloud/TestAddDocumentToShardAfterSchemaUpdated.java b/solr/core/src/test/org/apache/solr/cloud/TestAddDocumentToShardAfterSchemaUpdated.java new file mode 100644 index 00000000000..b121f2756cc --- /dev/null +++ b/solr/core/src/test/org/apache/solr/cloud/TestAddDocumentToShardAfterSchemaUpdated.java @@ -0,0 +1,586 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.cloud; + +import java.io.File; +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; + +import org.apache.commons.io.FileUtils; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.embedded.JettySolrRunner; +import org.apache.solr.client.solrj.impl.CloudSolrClient; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.client.solrj.request.UpdateRequest; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.params.CollectionAdminParams; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.zookeeper.KeeperException; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * We found a problem when adding documents to a shard where the schema is no longer the same as on the index/zookeeper. + * + * This was the case when we started enabling docvalues on indexes but because of large size of existing shards, + * reindexing the old data was not an option. + * + * The search system works fine with mixed shards (with and without docvalues). But once you start adding documents to + * an older shard, search crashes when reaching these new documents. + * + * A fix was created by keeping a separate schema on ConfigSet for when the ConfigSet in core.properties is different than the + * ConfigSet of the collection. This schema is only used for updating the lucene index (document creation) and requires a restart of the core after + * the ConfigSet is modified. + * + * The different tests : + * + * testAddDocumentAfterSchemaReplaced + * create 2 shards with a schema. Adds and update version of the schema and modify the ConfigSet of the collection. + * restart all after changes, add more records in old shard, restart again, optimize so index segments get merged + * testAddDocumentAfterSchemaUpdated + * same as testAddDocumentAfterSchemaReplaced only we don't add a new schema but update the original one. This gives errors after restart, + * adding document in old partition and optimizing the index. the exception happens when a search query returns one of the added documents + * testAddDocumentAfterSchemaReplacedNoRestart + * same as testAddDocumentAfterSchemaReplaced but instead of restarting the index we reload the index. + * testAddDocumentAfterSchemaReplaceMixedShards + * same as testAddDocumentAfterSchemaReplaced but we also add a new shard where we add docs with fields that only exist in the new schema. + */ +public class TestAddDocumentToShardAfterSchemaUpdated extends SolrCloudTestCase { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + private static final String COLLECTION = "docValueTest_coll"; + private static final String ZK_CONFIG = "zk_docValueTest_config"; + private static final String ZK_CONFIG_DOCVAL = "zk_docValueTest_docval_config"; + private static final String ZK_CONFIG_UPDATE = "zk_docValueTest_update_config"; + + private static final String FIELD_IDENTIFIER = "identifier"; + private static final String FIELD_VALUE = "value"; + private static final String FIELD_TEXT = "text"; + private static final String FIELD_EXTRA = "extra"; // only in updated schema + + @BeforeClass + public static void setupCluster() throws Exception { + useFactory("solr.StandardDirectoryFactory"); // necessary to find the index+tlog intact after restart + configureCluster(3) + .addConfig(ZK_CONFIG, + TEST_PATH().resolve("configsets").resolve("cloud-minimal-extSchema-noDocValues").resolve("conf")) + .configure(); + } + + @Before + public void setUp() throws Exception { + super.setUp(); + cluster.deleteAllCollections(); + } + + @Test + public void testAddDocumentAfterSchemaReplaced() throws Exception { + CloudSolrClient cloudClient = cluster.getSolrClient(); + // create collection + CollectionAdminRequest.Create create = CollectionAdminRequest.createCollectionWithImplicitRouter(COLLECTION, ZK_CONFIG, "shard1,shard2", 2); + create.setMaxShardsPerNode(2); + cloudClient.request(create); + cluster.waitForActiveCollection(COLLECTION, 2, 4); + + // add document + for (int i = 0; i < 10; i++) { + addDocument("i_" + i, "initial", "i_" + i + "added as initial document"); + } + cloudClient.commit(COLLECTION); + log.info("10 documents added - no docvalues"); + + // find document + QueryResponse response = cloudClient.query(COLLECTION, new SolrQuery(FIELD_IDENTIFIER+":i_5" )); + SolrDocumentList docs = response.getResults(); + assertEquals(1, docs.getNumFound()); + log.info("verified documents are searchable"); + + // restart index + log.info("restart solr (not zookeeper)"); + restartSolr(5); + cluster.waitForActiveCollection(COLLECTION, 2, 4); + + // find document + response = cloudClient.query(COLLECTION, new SolrQuery("*:*" )); + docs = response.getResults(); + assertEquals(10, docs.getNumFound()); + response = cloudClient.query(COLLECTION, new SolrQuery(FIELD_IDENTIFIER+":i_5" )); + docs = response.getResults(); + assertEquals(1, docs.getNumFound()); + log.info("verified documents are searchable"); + + modifyCollectionSchema(cloudClient); + + // add document after schema change , no restart yet + addDocument("i_10" , "after_change", "i_10 added as document after schema change but before restart"); + cloudClient.commit(COLLECTION); + log.info("added document after schema change but no restart"); + + // we expect still to find the document without a problem. + response = cloudClient.query(COLLECTION, new SolrQuery(FIELD_IDENTIFIER+":i_10" )); + docs = response.getResults(); + assertEquals(1, docs.getNumFound()); + + // restart index + log.info("restart solr (not zookeeper)"); + restartSolr(5); + cluster.waitForActiveCollection(COLLECTION, 2, 4); + + log.info("adding document after schema change and restart"); + addDocument("i_11" , "after_restart", "i_11 added as document after schema change and restart"); + cloudClient.commit(COLLECTION); + + response = cloudClient.query(COLLECTION, new SolrQuery(FIELD_IDENTIFIER+":i_5" )); + docs = response.getResults(); + assertEquals(1, docs.getNumFound()); + log.info("verified documents are searchable"); + + response = cloudClient.query(COLLECTION, new SolrQuery(FIELD_IDENTIFIER+":i_11" )); + docs = response.getResults(); + assertEquals(1, docs.getNumFound()); + SolrDocument doc = docs.get(0); + assertEquals("after_restart", doc.get(FIELD_VALUE)); + log.info("verified document 'new' document is searchable after restart"); + + // lets restart again. + log.info("restart solr (not zookeeper)"); + restartSolr(5); + cluster.waitForActiveCollection(COLLECTION, 2, 4); + + response = cloudClient.query(COLLECTION, new SolrQuery(FIELD_IDENTIFIER+":i_5" )); + docs = response.getResults(); + assertEquals(1, docs.getNumFound()); + doc = docs.get(0); + assertEquals("initial", doc.get(FIELD_VALUE)); + log.info("verified documents are searchable"); + + response = cloudClient.query(COLLECTION, new SolrQuery(FIELD_IDENTIFIER+":i_11" )); + docs = response.getResults(); + assertEquals(1, docs.getNumFound()); + doc = docs.get(0); + assertEquals("after_restart", doc.get(FIELD_VALUE)); + log.info("verified document 'new' document is searchable after restart"); + + cloudClient.optimize(COLLECTION); + log.info("optimized collection"); + + SolrQuery squery = new SolrQuery("*:*" ); + squery.setRows(100); + response = cloudClient.query(COLLECTION, squery); + docs = response.getResults(); + log.info("found {} documents", docs.getNumFound()); + for (int doci=0;doci docvalues issue will crash lucene and solr runners internal + e.printStackTrace(); + } + } + + @Test + public void testAddDocumentAfterSchemaReplacedNoRestart() throws Exception { + CloudSolrClient cloudClient = cluster.getSolrClient(); + // create collection + CollectionAdminRequest.Create create = CollectionAdminRequest.createCollectionWithImplicitRouter(COLLECTION, ZK_CONFIG, "shard1,shard2", 2); + create.setMaxShardsPerNode(2); + cloudClient.request(create); + cluster.waitForActiveCollection(COLLECTION, 2, 4); + + // add document + for (int i = 0; i < 10; i++) { + addDocument("i_" + i, "initial", "i_" + i + "added as initial document"); + } + cloudClient.commit(COLLECTION); + log.info("10 documents added - no docvalues"); + + // find document + QueryResponse response = cloudClient.query(COLLECTION, new SolrQuery(FIELD_IDENTIFIER+":i_5" )); + SolrDocumentList docs = response.getResults(); + assertEquals(1, docs.getNumFound()); + log.info("verified documents are searchable"); + + // restart index + log.info("restart solr (not zookeeper)"); + restartSolr(5); + cluster.waitForActiveCollection(COLLECTION, 2, 4); + + // find document + response = cloudClient.query(COLLECTION, new SolrQuery("*:*" )); + docs = response.getResults(); + assertEquals(10, docs.getNumFound()); + response = cloudClient.query(COLLECTION, new SolrQuery(FIELD_IDENTIFIER+":i_5" )); + docs = response.getResults(); + assertEquals(1, docs.getNumFound()); + log.info("verified documents are searchable"); + + modifyCollectionSchema(cloudClient); + + // reload index not restart + log.info("reload collection"); + CollectionAdminRequest.Reload reload = CollectionAdminRequest.reloadCollection(COLLECTION); + cloudClient.request(reload); + cluster.waitForActiveCollection(COLLECTION, 2, 4); + + // add document after schema change , after reload + cluster.waitForActiveCollection(COLLECTION, 2, 4); + addDocument("i_10" , "after_change", "i_10 added as document after schema change and reload"); + addDocument("i_11" , "after_restart", "i_11 added as document after schema change and reload"); + cloudClient.commit(COLLECTION); + + response = cloudClient.query(COLLECTION, new SolrQuery(FIELD_IDENTIFIER+":i_5" )); + docs = response.getResults(); + assertEquals(1, docs.getNumFound()); + log.info("verified documents are searchable"); + + response = cloudClient.query(COLLECTION, new SolrQuery(FIELD_IDENTIFIER+":i_11" )); + docs = response.getResults(); + assertEquals(1, docs.getNumFound()); + SolrDocument doc = docs.get(0); + assertEquals("after_restart", doc.get(FIELD_VALUE)); + log.info("verified document 'new' document is searchable after restart"); + + // lets restart again. + log.info("restart solr (not zookeeper)"); + restartSolr(5); + cluster.waitForActiveCollection(COLLECTION, 2, 4); + + response = cloudClient.query(COLLECTION, new SolrQuery(FIELD_IDENTIFIER+":i_5" )); + docs = response.getResults(); + assertEquals(1, docs.getNumFound()); + doc = docs.get(0); + assertEquals("initial", doc.get(FIELD_VALUE)); + log.info("verified documents are searchable"); + + response = cloudClient.query(COLLECTION, new SolrQuery(FIELD_IDENTIFIER+":i_11" )); + docs = response.getResults(); + assertEquals(1, docs.getNumFound()); + doc = docs.get(0); + assertEquals("after_restart", doc.get(FIELD_VALUE)); + log.info("verified document 'new' document is searchable after restart"); + + cloudClient.optimize(COLLECTION); + log.info("optimized collection"); + + SolrQuery squery = new SolrQuery("*:*" ); + squery.setRows(100); + response = cloudClient.query(COLLECTION, squery); + docs = response.getResults(); + log.info("found {} documents", docs.getNumFound()); + for (int doci=0;doci param = new HashMap<>(); + param.put(CollectionAdminParams.COLL_CONF,ZK_CONFIG_DOCVAL); + CollectionAdminRequest.Modify modify = CollectionAdminRequest.modifyCollection(COLLECTION, param); + cloudClient.request(modify); + cluster.waitForActiveCollection(COLLECTION, 2, 4); + log.info("modified schema of to have doc values"); + } + + private void setUpdateSchema(String schema) + throws IOException, KeeperException, InterruptedException { + // add updated schema and modify collection + String zkConfigPath = "/configs/" + ZK_CONFIG_UPDATE + "/solrconfig.xml"; + String zkSchemaPath = "/configs/" + ZK_CONFIG_UPDATE + "/schema.xml"; + String config = FileUtils.readFileToString(new File(TEST_PATH().resolve("configsets").resolve("cloud-minimal-extSchema-noDocValues").resolve("conf").toFile(),"solrconfig.xml"), StandardCharsets.UTF_8); + createZkPath(zkConfigPath); + cluster.getZkClient().setData(zkConfigPath, config.getBytes(StandardCharsets.UTF_8), true); + createZkPath(zkSchemaPath); + cluster.getZkClient().setData(zkSchemaPath, schema.getBytes(StandardCharsets.UTF_8), true); + log.info("added update schema (and config)"); + Map param = new HashMap<>(); + param.put(CollectionAdminParams.COLL_CONF,ZK_CONFIG_DOCVAL); + log.info("modified schema of to have doc values"); + } + + private void createZkPath(String zkConfigPath) throws KeeperException, InterruptedException { + if (!cluster.getZkClient().exists(zkConfigPath, false)) { + cluster.getZkClient().makePath(zkConfigPath, false ); + } + } + + private void restartSolr(int timeoutSeconds) throws Exception { + for (JettySolrRunner jetty : cluster.getJettySolrRunners()) { + jetty.stop(); + } + for (JettySolrRunner jetty : cluster.getJettySolrRunners()) { + jetty.start(true); + } + cluster.waitForAllNodes(timeoutSeconds); + } + + protected void addDocument(String identifier, String value, String text) throws SolrServerException, IOException { + SolrInputDocument doc = new SolrInputDocument(); + + addFields(doc, FIELD_IDENTIFIER, identifier, FIELD_VALUE, value, FIELD_TEXT, text , "_route_", "shard1"); + + UpdateRequest ureq = new UpdateRequest(); + ureq.add(doc); + ModifiableSolrParams params = new ModifiableSolrParams(); + ureq.setParams(params); + ureq.process(cluster.getSolrClient(), COLLECTION); + } + + protected void addDocumentUpdateShard(String identifier, String value,String extra, String text) throws SolrServerException, IOException { + SolrInputDocument doc = new SolrInputDocument(); + + addFields(doc, FIELD_IDENTIFIER, identifier, FIELD_VALUE, value, FIELD_TEXT, text , FIELD_EXTRA, extra, "_route_", "shard3"); + + UpdateRequest ureq = new UpdateRequest(); + ureq.add(doc); + ModifiableSolrParams params = new ModifiableSolrParams(); + ureq.setParams(params); + ureq.process(cluster.getSolrClient(), COLLECTION); + } + + protected static void addFields(SolrInputDocument doc, Object... fields) { + for (int i = 0; i < fields.length; i += 2) { + doc.addField((String) (fields[i]), fields[i + 1]); + } + }// add fields to the documet before indexing +} + diff --git a/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java b/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java index 50cbb988b11..0ad68b6675b 100644 --- a/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java +++ b/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java @@ -208,7 +208,7 @@ public class TestCodecSupport extends SolrTestCaseJ4 { CoreDescriptor cd = new CoreDescriptor(newCoreName, testSolrHome.resolve(newCoreName), coreContainer.getContainerProperties(), coreContainer.isZooKeeperAware()); c = new SolrCore(coreContainer, cd, - new ConfigSet("fakeConfigset", config, schema, null, true)); + new ConfigSet("fakeConfigset", config, schema, null, null, true)); assertNull(coreContainer.registerCore(cd, c, false, false)); h.coreName = newCoreName; assertEquals("We are not using the correct core", "solrconfig_codec2.xml", h.getCore().getConfigResource()); -- 2.37.3