|
23 | 23 | import java.time.format.DateTimeParseException; |
24 | 24 | import java.util.List; |
25 | 25 | import java.util.Optional; |
| 26 | +import java.util.function.Predicate; |
26 | 27 | import java.util.stream.Collectors; |
27 | 28 |
|
28 | 29 | import javax.annotation.Nonnull; |
@@ -327,8 +328,33 @@ public long getViewCount() throws ParsingException { |
327 | 328 | return -1; |
328 | 329 | } |
329 | 330 |
|
330 | | - final Optional<String> optTextContent = metadataPart(1, 0) |
331 | | - .map(this::getTextContentFromMetadataPart); |
| 331 | + // Search the info metadata row for text that looks like a view count. |
| 332 | + // YouTube uses 2 rows [author][views,date] for stream items outside channels |
| 333 | + // and 1 row in channels [views,date], so the views text could be in any part |
| 334 | + // of the info row. |
| 335 | + final int infoRowIndex = getInfoMetadataRowIndex(); |
| 336 | + Optional<String> optTextContent = findMetadataPartInRow(infoRowIndex, text -> { |
| 337 | + final String lower = text.toLowerCase(); |
| 338 | + return lower.matches(".*\\bviews?\\b.*") || lower.contains("watching") |
| 339 | + || lower.contains("recommended") || lower.contains(NO_VIEWS_LOWERCASE); |
| 340 | + }); |
| 341 | + |
| 342 | + // Fallback: search all rows. Handles livestreams with only 1 metadata row |
| 343 | + // in search/related/kiosk contexts, where that single row contains views. |
| 344 | + if (optTextContent.isEmpty()) { |
| 345 | + optTextContent = findMetadataPartInAllRows(text -> { |
| 346 | + final String lower = text.toLowerCase(); |
| 347 | + return lower.matches(".*\\bviews?\\b.*") || lower.contains("watching") |
| 348 | + || lower.contains("recommended") || lower.contains(NO_VIEWS_LOWERCASE); |
| 349 | + }); |
| 350 | + } |
| 351 | + |
| 352 | + // Fallback to original position if heuristic didn't match |
| 353 | + if (optTextContent.isEmpty()) { |
| 354 | + optTextContent = metadataPart(infoRowIndex, 0) |
| 355 | + .map(this::getTextContentFromMetadataPart); |
| 356 | + } |
| 357 | + |
332 | 358 | // We could do this inline if the ParsingException would be a RuntimeException -.- |
333 | 359 | if (optTextContent.isPresent()) { |
334 | 360 | return getViewCountFromViewCountText(optTextContent.get()); |
@@ -410,14 +436,88 @@ private String getTextContentFromMetadataPart(final JsonObject metadataPart) { |
410 | 436 | return metadataPart.getObject("text").getString("content"); |
411 | 437 | } |
412 | 438 |
|
| 439 | + /** |
| 440 | + * Returns the index of the metadata row containing view count and date info. |
| 441 | + * YouTube uses 2 rows [author][views,date] for stream items outside channels |
| 442 | + * and 1 row in channels [views,date] (as they don't return uploader info). |
| 443 | + */ |
| 444 | + protected int getInfoMetadataRowIndex() { |
| 445 | + return 1; |
| 446 | + } |
| 447 | + |
| 448 | + /** |
| 449 | + * Searches the metadata parts of a specific row for text matching the given predicate. |
| 450 | + * This handles variable part order (e.g. [views, date] vs [date, views]) within a row. |
| 451 | + */ |
| 452 | + private Optional<String> findMetadataPartInRow(final int rowIndex, |
| 453 | + @Nonnull final Predicate<String> predicate) |
| 454 | + throws ParsingException { |
| 455 | + if (cachedMetadataRows == null) { |
| 456 | + cachedMetadataRows = JsonUtils.getArray(lockupViewModel, |
| 457 | + "metadata.lockupMetadataViewModel.metadata" |
| 458 | + + ".contentMetadataViewModel.metadataRows"); |
| 459 | + } |
| 460 | + return cachedMetadataRows |
| 461 | + .streamAsJsonObjects() |
| 462 | + .skip(rowIndex) |
| 463 | + .limit(1) |
| 464 | + .flatMap(jsonObject -> jsonObject.getArray("metadataParts") |
| 465 | + .streamAsJsonObjects()) |
| 466 | + .map(this::getTextContentFromMetadataPart) |
| 467 | + .filter(predicate) |
| 468 | + .findFirst(); |
| 469 | + } |
| 470 | + |
| 471 | + /** |
| 472 | + * Searches all metadata rows for text matching the given predicate. |
| 473 | + * Used as a fallback when the info row doesn't contain the expected data, |
| 474 | + * e.g. for livestreams with only 1 metadata row in search results. |
| 475 | + */ |
| 476 | + private Optional<String> findMetadataPartInAllRows(@Nonnull final Predicate<String> predicate) |
| 477 | + throws ParsingException { |
| 478 | + if (cachedMetadataRows == null) { |
| 479 | + cachedMetadataRows = JsonUtils.getArray(lockupViewModel, |
| 480 | + "metadata.lockupMetadataViewModel.metadata" |
| 481 | + + ".contentMetadataViewModel.metadataRows"); |
| 482 | + } |
| 483 | + return cachedMetadataRows |
| 484 | + .streamAsJsonObjects() |
| 485 | + .flatMap(jsonObject -> jsonObject.getArray("metadataParts") |
| 486 | + .streamAsJsonObjects()) |
| 487 | + .map(this::getTextContentFromMetadataPart) |
| 488 | + .filter(predicate) |
| 489 | + .findFirst(); |
| 490 | + } |
| 491 | + |
413 | 492 | private boolean isLive() throws ParsingException { |
414 | 493 | return getStreamType() != StreamType.VIDEO_STREAM; |
415 | 494 | } |
416 | 495 |
|
417 | 496 | private Optional<String> getDateText() throws ParsingException { |
418 | 497 | if (cachedDateText == null) { |
419 | | - cachedDateText = metadataPart(1, 1) |
420 | | - .map(this::getTextContentFromMetadataPart); |
| 498 | + // YouTube uses 2 rows [author][views,date] for stream items outside channels |
| 499 | + // and 1 row in channels [views,date] (as they don't return uploader info in them), |
| 500 | + // so the date text could be in any part of the info row. |
| 501 | + final int infoRowIndex = getInfoMetadataRowIndex(); |
| 502 | + cachedDateText = findMetadataPartInRow(infoRowIndex, text -> |
| 503 | + text.endsWith("ago") || text.contains(PREMIERES_TEXT)); |
| 504 | + |
| 505 | + // Fallback: search all rows. Handles livestreams with only 1 metadata row |
| 506 | + // in search/related/kiosk contexts, where that single row may contain the date. |
| 507 | + if (cachedDateText.isEmpty()) { |
| 508 | + cachedDateText = findMetadataPartInAllRows(text -> |
| 509 | + text.endsWith("ago") || text.contains(PREMIERES_TEXT)); |
| 510 | + } |
| 511 | + |
| 512 | + // Fallback to original positions if heuristic didn't match |
| 513 | + if (cachedDateText.isEmpty()) { |
| 514 | + cachedDateText = metadataPart(infoRowIndex, 1) |
| 515 | + .map(this::getTextContentFromMetadataPart); |
| 516 | + } |
| 517 | + if (cachedDateText.isEmpty()) { |
| 518 | + cachedDateText = metadataPart(0, 1) |
| 519 | + .map(this::getTextContentFromMetadataPart); |
| 520 | + } |
421 | 521 | } |
422 | 522 | return cachedDateText; |
423 | 523 | } |
|
0 commit comments