@@ -78,6 +78,31 @@ public class ContentIndexer {
7878
7979 private static final int MEDIA_FILE_SIZE_LIMIT = 300 * 1024 ; // Bytes
8080 private static final int NANOSECONDS_IN_A_MILLISECOND = 1000000 ;
81+ private static final String ERROR_OCCURRED_SUFFIX = ". The following error occurred: " ;
82+
83+ private static class IndexingContext {
84+ final Map <String , Content > contentCache ;
85+ final Set <String > tagsList ;
86+ final Map <String , String > allUnits ;
87+ final Map <String , String > publishedUnits ;
88+ final Map <Content , List <String >> indexProblemCache ;
89+ final boolean includeUnpublished ;
90+
91+ IndexingContext (final Map <String , Content > contentCache , final Set <String > tagsList ,
92+ final Map <String , String > allUnits , final Map <String , String > publishedUnits ,
93+ final Map <Content , List <String >> indexProblemCache , final boolean includeUnpublished ) {
94+ this .contentCache = contentCache ;
95+ this .tagsList = tagsList ;
96+ this .allUnits = allUnits ;
97+ this .publishedUnits = publishedUnits ;
98+ this .indexProblemCache = indexProblemCache ;
99+ this .includeUnpublished = includeUnpublished ;
100+ }
101+
102+ boolean shouldSkipUnpublished (final Content content ) {
103+ return !includeUnpublished && !content .getPublished ();
104+ }
105+ }
81106
82107 @ Inject
83108 public ContentIndexer (final GitDb database , final ElasticSearchIndexer es , final ContentMapperUtils mapperUtils ) {
@@ -212,46 +237,10 @@ private synchronized void buildGitContentIndex(final String sha,
212237 log .info ("Populating git content cache based on sha " + sanitiseInternalLogValue (sha ) + " ..." );
213238
214239 // Traverse the git repository looking for the .json files
240+ IndexingContext context = new IndexingContext (contentCache , tagsList , allUnits , publishedUnits ,
241+ indexProblemCache , includeUnpublished );
215242 while (treeWalk .next ()) {
216- ByteArrayOutputStream out = new ByteArrayOutputStream ();
217- ObjectLoader loader = repository .open (treeWalk .getObjectId (0 ));
218- loader .copyTo (out );
219-
220- // setup object mapper to use preconfigured deserializer
221- // module. Required to deal with type polymorphism
222- ObjectMapper objectMapper = mapperUtils .getSharedContentObjectMapper ();
223-
224- Content content ;
225- try {
226- content = (Content ) objectMapper .readValue (out .toString (), ContentBase .class );
227-
228- // check if we only want to index published content
229- if (!includeUnpublished && !content .getPublished ()) {
230- log .debug ("Skipping unpublished content: {}" , content .getId ());
231- continue ;
232- }
233-
234- content = this .augmentChildContent (content , treeWalk .getPathString (), null , content .getPublished ());
235-
236- if (null != content ) {
237- indexContentObject (contentCache , tagsList , allUnits , publishedUnits , indexProblemCache ,
238- treeWalk .getPathString (), content );
239- }
240- } catch (JsonMappingException e ) {
241- log .debug (String .format ("Unable to parse the json file found %s as a content object. "
242- + "Skipping file due to error: \n %s" , treeWalk .getPathString (), e .getMessage ()));
243- Content dummyContent = new Content ();
244- dummyContent .setCanonicalSourceFile (treeWalk .getPathString ());
245- this .registerContentProblem (dummyContent , "Index failure - Unable to parse json file found - "
246- + treeWalk .getPathString () + ". The following error occurred: " + e .getMessage (), indexProblemCache );
247- } catch (IOException e ) {
248- log .error ("IOException while trying to parse {}" , treeWalk .getPathString (), e );
249- Content dummyContent = new Content ();
250- dummyContent .setCanonicalSourceFile (treeWalk .getPathString ());
251- this .registerContentProblem (dummyContent ,
252- "Index failure - Unable to read the json file found - " + treeWalk .getPathString ()
253- + ". The following error occurred: " + e .getMessage (), indexProblemCache );
254- }
243+ processJsonFile (treeWalk , repository , context );
255244 }
256245
257246 repository .close ();
@@ -265,6 +254,54 @@ private synchronized void buildGitContentIndex(final String sha,
265254 }
266255 }
267256
257+ private void processJsonFile (final TreeWalk treeWalk , final Repository repository ,
258+ final IndexingContext context ) {
259+ try {
260+ ByteArrayOutputStream out = new ByteArrayOutputStream ();
261+ ObjectLoader loader = repository .open (treeWalk .getObjectId (0 ));
262+ loader .copyTo (out );
263+
264+ ObjectMapper objectMapper = mapperUtils .getSharedContentObjectMapper ();
265+
266+ try {
267+ Content content = (Content ) objectMapper .readValue (out .toString (), ContentBase .class );
268+
269+ if (context .shouldSkipUnpublished (content )) {
270+ log .debug ("Skipping unpublished content: {}" , content .getId ());
271+ return ;
272+ }
273+
274+ content = this .augmentChildContent (content , treeWalk .getPathString (), null , content .getPublished ());
275+
276+ if (null != content ) {
277+ indexContentObject (context .contentCache , context .tagsList , context .allUnits , context .publishedUnits ,
278+ context .indexProblemCache , treeWalk .getPathString (), content );
279+ }
280+ } catch (JsonMappingException e ) {
281+ log .debug (String .format ("Unable to parse the json file found %s as a content object. "
282+ + "Skipping file due to error: \n %s" , treeWalk .getPathString (), e .getMessage ()));
283+ Content dummyContent = new Content ();
284+ dummyContent .setCanonicalSourceFile (treeWalk .getPathString ());
285+ this .registerContentProblem (dummyContent , "Index failure - Unable to parse json file found - "
286+ + treeWalk .getPathString () + ERROR_OCCURRED_SUFFIX + e .getMessage (), context .indexProblemCache );
287+ } catch (IOException e ) {
288+ log .error ("IOException while trying to parse {}" , treeWalk .getPathString (), e );
289+ Content dummyContent = new Content ();
290+ dummyContent .setCanonicalSourceFile (treeWalk .getPathString ());
291+ this .registerContentProblem (dummyContent ,
292+ "Index failure - Unable to read the json file found - " + treeWalk .getPathString ()
293+ + ERROR_OCCURRED_SUFFIX + e .getMessage (), context .indexProblemCache );
294+ }
295+ } catch (Exception e ) {
296+ log .error ("Unexpected error while processing file {}: {}" , treeWalk .getPathString (), e .getMessage (), e );
297+ Content dummyContent = new Content ();
298+ dummyContent .setCanonicalSourceFile (treeWalk .getPathString ());
299+ this .registerContentProblem (dummyContent ,
300+ "Index failure - Unexpected error while processing file - " + treeWalk .getPathString ()
301+ + ERROR_OCCURRED_SUFFIX + e .getMessage (), context .indexProblemCache );
302+ }
303+ }
304+
268305 private void indexContentObject (
269306 final Map <String , Content > contentCache , final Set <String > tagsList , final Map <String , String > allUnits ,
270307 final Map <String , String > publishedUnits , final Map <Content , List <String >> indexProblemCache ,
@@ -277,82 +314,69 @@ private void indexContentObject(
277314
278315 // add children (and parent) from flattened Set to
279316 // cache if they have ids
317+ IndexingContext context = new IndexingContext (contentCache , tagsList , allUnits , publishedUnits ,
318+ indexProblemCache , true );
280319 for (Content flattenedContent : this .flattenContentObjects (content )) {
281- if (flattenedContent . getId () == null ) {
282- continue ;
283- }
320+ validateAndCacheContent (flattenedContent , content , treeWalkPath , context );
321+ }
322+ }
284323
285- // Prevents ETL indexing of quizzes that contain anything that is not an IsaacQuizSection
286- // in the top-level children array.
287- // NOTE: I'm not sure if this is the right place for this but I couldn't find a better one.
288- // This also seems to be the only time we can prevent a file from being indexed entirely.
289- if (flattenedContent instanceof IsaacQuiz ) {
290- List <ContentBase > children = flattenedContent .getChildren ();
291- if (children .stream ().anyMatch (c -> !(c instanceof IsaacQuizSection ))) {
292- log .debug ("IsaacQuiz ({}) contains top-level non-quiz sections. Skipping." , flattenedContent .getId ());
293- this .registerContentProblem (flattenedContent , "Index failure - Invalid "
294- + "content type among quiz sections. Quizzes can only contain quiz sections "
295- + "in the top-level children array." , indexProblemCache );
296- continue ;
297- }
298- }
324+ private void validateAndCacheContent (final Content flattenedContent , final Content parentContent ,
325+ final String treeWalkPath , final IndexingContext context ) {
326+ if (flattenedContent .getId () == null ) {
327+ return ;
328+ }
299329
300- if (flattenedContent .getId ().length () > MAXIMUM_CONTENT_ID_LENGTH ) {
301- log .debug ("Content ID too long: {}" , flattenedContent .getId ());
302- this .registerContentProblem (flattenedContent , "Content ID too long: " + flattenedContent .getId (),
303- indexProblemCache );
304- continue ;
330+ if (flattenedContent instanceof IsaacQuiz ) {
331+ List <ContentBase > children = flattenedContent .getChildren ();
332+ if (children .stream ().anyMatch (c -> !(c instanceof IsaacQuizSection ))) {
333+ log .debug ("IsaacQuiz ({}) contains top-level non-quiz sections. Skipping." , flattenedContent .getId ());
334+ this .registerContentProblem (flattenedContent , "Index failure - Invalid "
335+ + "content type among quiz sections. Quizzes can only contain quiz sections "
336+ + "in the top-level children array." , context .indexProblemCache );
337+ return ;
305338 }
339+ }
306340
307- if (flattenedContent .getId ().contains ("." )) {
308- // Otherwise, duplicate IDs with different content,
309- // therefore log an error
310- log .debug ("Resource with invalid ID ({}) detected in cache. Skipping {}" , content .getId (), treeWalkPath );
311-
312- this .registerContentProblem (flattenedContent , "Index failure - Invalid ID "
313- + flattenedContent .getId () + " found in file " + treeWalkPath
314- + ". Must not contain restricted characters." , indexProblemCache );
315- continue ;
316- }
341+ if (flattenedContent .getId ().length () > MAXIMUM_CONTENT_ID_LENGTH ) {
342+ log .debug ("Content ID too long: {}" , flattenedContent .getId ());
343+ this .registerContentProblem (flattenedContent , "Content ID too long: " + flattenedContent .getId (),
344+ context .indexProblemCache );
345+ return ;
346+ }
317347
318- // check if we have seen this key before if
319- // we have then we don't want to add it
320- // again
321- if (!contentCache .containsKey (flattenedContent .getId ())) {
322- // It must be new so we can add it
323- log .debug ("Loading into cache: {} ({}) from {}" , flattenedContent .getId (), flattenedContent .getType (),
324- treeWalkPath );
325- contentCache .put (flattenedContent .getId (), flattenedContent );
326- registerTags (flattenedContent .getTags (), tagsList );
327-
328- // If this is a numeric question, extract any
329- // units from its answers.
330-
331- if (flattenedContent instanceof IsaacNumericQuestion ) {
332- registerUnits ((IsaacNumericQuestion ) flattenedContent , allUnits , publishedUnits );
333- }
348+ if (flattenedContent .getId ().contains ("." )) {
349+ log .debug ("Resource with invalid ID ({}) detected in cache. Skipping {}" , parentContent .getId (), treeWalkPath );
350+ this .registerContentProblem (flattenedContent , "Index failure - Invalid ID "
351+ + flattenedContent .getId () + " found in file " + treeWalkPath
352+ + ". Must not contain restricted characters." , context .indexProblemCache );
353+ return ;
354+ }
334355
335- continue ; // our work here is done
336- }
356+ if (!context .contentCache .containsKey (flattenedContent .getId ())) {
357+ log .debug ("Loading into cache: {} ({}) from {}" , flattenedContent .getId (), flattenedContent .getType (),
358+ treeWalkPath );
359+ context .contentCache .put (flattenedContent .getId (), flattenedContent );
360+ registerTags (flattenedContent .getTags (), context .tagsList );
337361
338- // shaCache contains key already, compare the
339- // content
340- if (contentCache .get (flattenedContent .getId ()).equals (flattenedContent )) {
341- // content is the same therefore it is just
342- // reuse of a content object so that is
343- // fine.
344- log .debug ("Resource ({}) already seen in cache. Skipping {}" , content .getId (), treeWalkPath );
345- continue ;
362+ if (flattenedContent instanceof IsaacNumericQuestion ) {
363+ registerUnits ((IsaacNumericQuestion ) flattenedContent , context .allUnits , context .publishedUnits );
346364 }
365+ return ;
366+ }
347367
348- // Otherwise, duplicate IDs with different content,
349- // therefore log an error
350- log .debug ("Resource with duplicate ID ({}) detected in cache. Skipping {}" , content .getId (), treeWalkPath );
351- this .registerContentProblem (flattenedContent , String .format (
352- "Index failure - Duplicate ID (%s) found in files (%s) and (%s): only one will be available." ,
353- content .getId (), treeWalkPath , contentCache .get (flattenedContent .getId ()).getCanonicalSourceFile ()),
354- indexProblemCache );
368+ if (context .contentCache .get (flattenedContent .getId ()).equals (flattenedContent )) {
369+ log .debug ("Resource ({}) already seen in cache. Skipping {}" , parentContent .getId (), treeWalkPath );
370+ return ;
355371 }
372+
373+ log .debug ("Resource with duplicate ID ({}) detected in cache. Skipping {}" , parentContent .getId (), treeWalkPath );
374+ this .registerContentProblem (flattenedContent , String .format (
375+ "Index failure - Duplicate ID (%s) found in files (%s) and (%s): only one will be available." ,
376+ parentContent .getId (),
377+ treeWalkPath ,
378+ context .contentCache .get (flattenedContent .getId ()).getCanonicalSourceFile ()),
379+ context .indexProblemCache );
356380 }
357381
358382 /**
@@ -419,44 +443,7 @@ private Content augmentChildContent(final Content content, final String canonica
419443 }
420444
421445 if (content instanceof Question ) {
422- Question question = (Question ) content ;
423- if (question .getHints () != null ) {
424- for (ContentBase cb : question .getHints ()) {
425- Content c = (Content ) cb ;
426- this .augmentChildContent (c , canonicalSourceFile , newParentId , parentPublished );
427- }
428- }
429-
430- // Augment question answers
431- if (question .getAnswer () != null ) {
432- Content answer = (Content ) question .getAnswer ();
433- if (answer .getChildren () != null ) {
434- for (ContentBase cb : answer .getChildren ()) {
435- Content c = (Content ) cb ;
436- this .augmentChildContent (c , canonicalSourceFile , newParentId , parentPublished );
437- }
438- }
439- }
440-
441- if (question .getDefaultFeedback () != null ) {
442- Content defaultFeedback = question .getDefaultFeedback ();
443- if (defaultFeedback .getChildren () != null ) {
444- for (ContentBase cb : defaultFeedback .getChildren ()) {
445- Content c = (Content ) cb ;
446- this .augmentChildContent (c , canonicalSourceFile , newParentId , parentPublished );
447- }
448- }
449- }
450-
451- if (content instanceof ChoiceQuestion ) {
452- ChoiceQuestion choiceQuestion = (ChoiceQuestion ) content ;
453- if (choiceQuestion .getChoices () != null ) {
454- for (ContentBase cb : choiceQuestion .getChoices ()) {
455- Content c = (Content ) cb ;
456- this .augmentChildContent (c , canonicalSourceFile , newParentId , parentPublished );
457- }
458- }
459- }
446+ augmentQuestionContent ((Question ) content , canonicalSourceFile , newParentId , parentPublished );
460447 }
461448
462449 // try to determine if we have media as fields to deal with in this class
@@ -521,10 +508,68 @@ private void collateSearchableContent(final Content content, final StringBuilder
521508 /**
522509 * Concatenate the source of a media content object with its parent source file.
523510 *
524- * @param canonicalSourceFile the canonical path to use for concat operations.
525- * @param originalSrc to modify
511+ * @param canonicalSourceFile the canonical path to use for concat operations
526512 * @return src with relative paths fixed.
527513 */
514+ private void augmentQuestionContent (final Question question , final String canonicalSourceFile ,
515+ final String newParentId , final boolean parentPublished ) {
516+ augmentHints (question , canonicalSourceFile , newParentId , parentPublished );
517+ augmentAnswerContent (question , canonicalSourceFile , newParentId , parentPublished );
518+ augmentFeedbackContent (question , canonicalSourceFile , newParentId , parentPublished );
519+ augmentChoiceQuestionContent (question , canonicalSourceFile , newParentId , parentPublished );
520+ }
521+
522+ private void augmentHints (final Question question , final String canonicalSourceFile , final String newParentId ,
523+ final boolean parentPublished ) {
524+ if (question .getHints () != null ) {
525+ for (ContentBase cb : question .getHints ()) {
526+ Content c = (Content ) cb ;
527+ this .augmentChildContent (c , canonicalSourceFile , newParentId , parentPublished );
528+ }
529+ }
530+ }
531+
532+ private void augmentAnswerContent (final Question question , final String canonicalSourceFile , final String newParentId ,
533+ final boolean parentPublished ) {
534+ if (question .getAnswer () != null ) {
535+ Content answer = (Content ) question .getAnswer ();
536+ if (answer .getChildren () != null ) {
537+ for (ContentBase cb : answer .getChildren ()) {
538+ Content c = (Content ) cb ;
539+ this .augmentChildContent (c , canonicalSourceFile , newParentId , parentPublished );
540+ }
541+ }
542+ }
543+ }
544+
545+ private void augmentFeedbackContent (final Question question ,
546+ final String canonicalSourceFile ,
547+ final String newParentId ,
548+ final boolean parentPublished ) {
549+ if (question .getDefaultFeedback () != null ) {
550+ Content defaultFeedback = question .getDefaultFeedback ();
551+ if (defaultFeedback .getChildren () != null ) {
552+ for (ContentBase cb : defaultFeedback .getChildren ()) {
553+ Content c = (Content ) cb ;
554+ this .augmentChildContent (c , canonicalSourceFile , newParentId , parentPublished );
555+ }
556+ }
557+ }
558+ }
559+
560+ private void augmentChoiceQuestionContent (final Question question , final String canonicalSourceFile ,
561+ final String newParentId , final boolean parentPublished ) {
562+ if (question instanceof ChoiceQuestion ) {
563+ ChoiceQuestion choiceQuestion = (ChoiceQuestion ) question ;
564+ if (choiceQuestion .getChoices () != null ) {
565+ for (ContentBase cb : choiceQuestion .getChoices ()) {
566+ Content c = (Content ) cb ;
567+ this .augmentChildContent (c , canonicalSourceFile , newParentId , parentPublished );
568+ }
569+ }
570+ }
571+ }
572+
528573 private String fixMediaSrc (final String canonicalSourceFile , final String originalSrc ) {
529574 if (originalSrc != null && !(originalSrc .startsWith ("http://" ) || originalSrc .startsWith ("https://" )
530575 || originalSrc .startsWith ("/assets/" ))) {
0 commit comments