Skip to content

Commit 585ccde

Browse files
authored
Merge pull request #1492 from Ecomont/fix/lockup-channel-tab-upload-dates
[YouTube] Fix date/view extraction and add tests for lockupViewModel in channel tabs
2 parents 6be2d10 + 5f57172 commit 585ccde

21 files changed

Lines changed: 2981 additions & 6066 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelTabExtractor.java

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import com.grack.nanojson.JsonArray;
44
import com.grack.nanojson.JsonObject;
55
import com.grack.nanojson.JsonWriter;
6+
import org.schabi.newpipe.extractor.Image;
67
import org.schabi.newpipe.extractor.InfoItem;
78
import org.schabi.newpipe.extractor.MultiInfoItemsCollector;
89
import org.schabi.newpipe.extractor.Page;
@@ -282,6 +283,9 @@ private Optional<JsonObject> collectItem(@Nonnull final MultiInfoItemsCollector
282283
} else if (richItem.has("playlistRenderer")) {
283284
commitPlaylist(collector, richItem.getObject("playlistRenderer"),
284285
channelVerifiedStatus, channelName, channelUrl);
286+
} else if (richItem.has("lockupViewModel")) {
287+
commitLockup(collector, channelVerifiedStatus, channelName, channelUrl,
288+
timeAgoParser, richItem);
285289
}
286290
} else if (item.has("gridVideoRenderer")) {
287291
commitVideo(collector, timeAgoParser, item.getObject("gridVideoRenderer"),
@@ -306,23 +310,33 @@ private Optional<JsonObject> collectItem(@Nonnull final MultiInfoItemsCollector
306310
return collectItemsFrom(collector, item.getObject("expandedShelfContentsRenderer")
307311
.getArray("items"), channelVerifiedStatus, channelName, channelUrl);
308312
} else if (item.has("lockupViewModel")) {
309-
final JsonObject lockupViewModel = item.getObject("lockupViewModel");
310-
final String contentType = lockupViewModel.getString("contentType");
311-
if ("LOCKUP_CONTENT_TYPE_PLAYLIST".equals(contentType)
312-
|| "LOCKUP_CONTENT_TYPE_PODCAST".equals(contentType)) {
313-
commitPlaylistLockup(collector, lockupViewModel, channelVerifiedStatus,
314-
channelName, channelUrl);
315-
} else if ("LOCKUP_CONTENT_TYPE_VIDEO".equals(contentType)) {
316-
commitVideoLockup(collector, timeAgoParser, lockupViewModel, channelVerifiedStatus,
317-
channelName, channelUrl);
318-
}
313+
commitLockup(collector, channelVerifiedStatus, channelName, channelUrl, timeAgoParser,
314+
item);
319315
} else if (item.has("continuationItemRenderer")) {
320316
return Optional.ofNullable(item.getObject("continuationItemRenderer"));
321317
}
322318

323319
return Optional.empty();
324320
}
325321

322+
private void commitLockup(@Nonnull final MultiInfoItemsCollector collector,
323+
@Nonnull final VerifiedStatus channelVerifiedStatus,
324+
@Nullable final String channelName,
325+
@Nullable final String channelUrl,
326+
@Nonnull final TimeAgoParser timeAgoParser,
327+
@Nonnull final JsonObject richItem) {
328+
final JsonObject lockupViewModel = richItem.getObject("lockupViewModel");
329+
final String contentType = lockupViewModel.getString("contentType");
330+
if ("LOCKUP_CONTENT_TYPE_PLAYLIST".equals(contentType)
331+
|| "LOCKUP_CONTENT_TYPE_PODCAST".equals(contentType)) {
332+
commitPlaylistLockup(collector, lockupViewModel, channelVerifiedStatus,
333+
channelName, channelUrl);
334+
} else if ("LOCKUP_CONTENT_TYPE_VIDEO".equals(contentType)) {
335+
commitVideoLockup(collector, timeAgoParser, lockupViewModel,
336+
channelVerifiedStatus, channelName, channelUrl);
337+
}
338+
}
339+
326340
private static void commitReel(@Nonnull final MultiInfoItemsCollector collector,
327341
@Nonnull final JsonObject reelItemRenderer,
328342
@Nonnull final VerifiedStatus channelVerifiedStatus,
@@ -379,6 +393,22 @@ private static void commitVideoLockup(@Nonnull final MultiInfoItemsCollector col
379393
@Nullable final String channelUrl) {
380394
collector.commit(
381395
new YoutubeStreamInfoItemLockupExtractor(lockupViewModel, timeAgoParser) {
396+
/**
397+
* Channel tabs use a 1-row metadata format [views, date]
398+
* instead of 2 rows [author][views, date].
399+
*/
400+
@Override
401+
protected int getInfoMetadataRowIndex() {
402+
return 0;
403+
}
404+
405+
@Nonnull
406+
@Override
407+
public List<Image> getUploaderAvatars() throws ParsingException {
408+
// Uploader avatars are not available in channels' video items
409+
return List.of();
410+
}
411+
382412
@Override
383413
public String getUploaderName() throws ParsingException {
384414
return isNullOrEmpty(channelName) ? super.getUploaderName() : channelName;

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemLockupExtractor.java

Lines changed: 104 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import java.time.format.DateTimeParseException;
2424
import java.util.List;
2525
import java.util.Optional;
26+
import java.util.function.Predicate;
2627
import java.util.stream.Collectors;
2728

2829
import javax.annotation.Nonnull;
@@ -327,8 +328,33 @@ public long getViewCount() throws ParsingException {
327328
return -1;
328329
}
329330

330-
final Optional<String> optTextContent = metadataPart(1, 0)
331-
.map(this::getTextContentFromMetadataPart);
331+
// Search the info metadata row for text that looks like a view count.
332+
// YouTube uses 2 rows [author][views,date] for stream items outside channels
333+
// and 1 row in channels [views,date], so the views text could be in any part
334+
// of the info row.
335+
final int infoRowIndex = getInfoMetadataRowIndex();
336+
Optional<String> optTextContent = findMetadataPartInRow(infoRowIndex, text -> {
337+
final String lower = text.toLowerCase();
338+
return lower.matches(".*\\bviews?\\b.*") || lower.contains("watching")
339+
|| lower.contains("recommended") || lower.contains(NO_VIEWS_LOWERCASE);
340+
});
341+
342+
// Fallback: search all rows. Handles livestreams with only 1 metadata row
343+
// in search/related/kiosk contexts, where that single row contains views.
344+
if (optTextContent.isEmpty()) {
345+
optTextContent = findMetadataPartInAllRows(text -> {
346+
final String lower = text.toLowerCase();
347+
return lower.matches(".*\\bviews?\\b.*") || lower.contains("watching")
348+
|| lower.contains("recommended") || lower.contains(NO_VIEWS_LOWERCASE);
349+
});
350+
}
351+
352+
// Fallback to original position if heuristic didn't match
353+
if (optTextContent.isEmpty()) {
354+
optTextContent = metadataPart(infoRowIndex, 0)
355+
.map(this::getTextContentFromMetadataPart);
356+
}
357+
332358
// We could do this inline if the ParsingException would be a RuntimeException -.-
333359
if (optTextContent.isPresent()) {
334360
return getViewCountFromViewCountText(optTextContent.get());
@@ -410,14 +436,88 @@ private String getTextContentFromMetadataPart(final JsonObject metadataPart) {
410436
return metadataPart.getObject("text").getString("content");
411437
}
412438

439+
/**
440+
* Returns the index of the metadata row containing view count and date info.
441+
* YouTube uses 2 rows [author][views,date] for stream items outside channels
442+
* and 1 row in channels [views,date] (as they don't return uploader info).
443+
*/
444+
protected int getInfoMetadataRowIndex() {
445+
return 1;
446+
}
447+
448+
/**
449+
* Searches the metadata parts of a specific row for text matching the given predicate.
450+
* This handles variable part order (e.g. [views, date] vs [date, views]) within a row.
451+
*/
452+
private Optional<String> findMetadataPartInRow(final int rowIndex,
453+
@Nonnull final Predicate<String> predicate)
454+
throws ParsingException {
455+
if (cachedMetadataRows == null) {
456+
cachedMetadataRows = JsonUtils.getArray(lockupViewModel,
457+
"metadata.lockupMetadataViewModel.metadata"
458+
+ ".contentMetadataViewModel.metadataRows");
459+
}
460+
return cachedMetadataRows
461+
.streamAsJsonObjects()
462+
.skip(rowIndex)
463+
.limit(1)
464+
.flatMap(jsonObject -> jsonObject.getArray("metadataParts")
465+
.streamAsJsonObjects())
466+
.map(this::getTextContentFromMetadataPart)
467+
.filter(predicate)
468+
.findFirst();
469+
}
470+
471+
/**
472+
* Searches all metadata rows for text matching the given predicate.
473+
* Used as a fallback when the info row doesn't contain the expected data,
474+
* e.g. for livestreams with only 1 metadata row in search results.
475+
*/
476+
private Optional<String> findMetadataPartInAllRows(@Nonnull final Predicate<String> predicate)
477+
throws ParsingException {
478+
if (cachedMetadataRows == null) {
479+
cachedMetadataRows = JsonUtils.getArray(lockupViewModel,
480+
"metadata.lockupMetadataViewModel.metadata"
481+
+ ".contentMetadataViewModel.metadataRows");
482+
}
483+
return cachedMetadataRows
484+
.streamAsJsonObjects()
485+
.flatMap(jsonObject -> jsonObject.getArray("metadataParts")
486+
.streamAsJsonObjects())
487+
.map(this::getTextContentFromMetadataPart)
488+
.filter(predicate)
489+
.findFirst();
490+
}
491+
413492
private boolean isLive() throws ParsingException {
414493
return getStreamType() != StreamType.VIDEO_STREAM;
415494
}
416495

417496
private Optional<String> getDateText() throws ParsingException {
418497
if (cachedDateText == null) {
419-
cachedDateText = metadataPart(1, 1)
420-
.map(this::getTextContentFromMetadataPart);
498+
// YouTube uses 2 rows [author][views,date] for stream items outside channels
499+
// and 1 row in channels [views,date] (as they don't return uploader info in them),
500+
// so the date text could be in any part of the info row.
501+
final int infoRowIndex = getInfoMetadataRowIndex();
502+
cachedDateText = findMetadataPartInRow(infoRowIndex, text ->
503+
text.endsWith("ago") || text.contains(PREMIERES_TEXT));
504+
505+
// Fallback: search all rows. Handles livestreams with only 1 metadata row
506+
// in search/related/kiosk contexts, where that single row may contain the date.
507+
if (cachedDateText.isEmpty()) {
508+
cachedDateText = findMetadataPartInAllRows(text ->
509+
text.endsWith("ago") || text.contains(PREMIERES_TEXT));
510+
}
511+
512+
// Fallback to original positions if heuristic didn't match
513+
if (cachedDateText.isEmpty()) {
514+
cachedDateText = metadataPart(infoRowIndex, 1)
515+
.map(this::getTextContentFromMetadataPart);
516+
}
517+
if (cachedDateText.isEmpty()) {
518+
cachedDateText = metadataPart(0, 1)
519+
.map(this::getTextContentFromMetadataPart);
520+
}
421521
}
422522
return cachedDateText;
423523
}

extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,8 @@ public void testVerified() throws Exception {
214214
@Test
215215
@Override
216216
public void testTabs() throws Exception {
217-
assertTabsContain(extractor().getTabs(), ChannelTabs.VIDEOS, ChannelTabs.PLAYLISTS);
217+
assertTabsContain(extractor().getTabs(), ChannelTabs.VIDEOS, ChannelTabs.SHORTS,
218+
ChannelTabs.PLAYLISTS);
218219
assertTrue(extractor().getTabs().stream()
219220
.filter(it -> ChannelTabs.VIDEOS.equals(it.getContentFilters().get(0)))
220221
.allMatch(ReadyChannelTabListLinkHandler.class::isInstance));

extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamInfoItemTest.java

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,87 @@ void lockupViewModelNoDuration()
138138
);
139139
}
140140

141+
/**
142+
* Tests that the info row search correctly extracts date and view count
143+
* from the 1-row channel format where parts are in normal order: [views, date].
144+
*/
145+
@Test
146+
void lockupViewModelOneRowNormal()
147+
throws FileNotFoundException, JsonParserException {
148+
final var json = JsonParser.object().from(new FileInputStream(getMockPath(
149+
YoutubeStreamInfoItemTest.class, "lockupViewModelOneRowNormal") + ".json"));
150+
final var timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.DEFAULT);
151+
final var extractor = new YoutubeStreamInfoItemLockupExtractor(json, timeAgoParser) {
152+
// Channel tabs use 1-row format at index 0
153+
@Override
154+
protected int getInfoMetadataRowIndex() {
155+
return 0;
156+
}
157+
};
158+
assertAll(
159+
() -> assertEquals(StreamType.VIDEO_STREAM, extractor.getStreamType()),
160+
() -> assertEquals("Test Video One Row Normal", extractor.getName()),
161+
() -> assertEquals("2 hours ago", extractor.getTextualUploadDate()),
162+
() -> assertNotNull(extractor.getUploadDate()),
163+
() -> assertEquals(3600000, extractor.getViewCount()), // 3.6m views
164+
() -> assertEquals(630, extractor.getDuration()) // 10:30
165+
);
166+
}
167+
168+
/**
169+
* Tests that the info row search correctly extracts date and view count
170+
* from the 1-row channel format where parts are in reversed order: [date, views].
171+
*/
172+
@Test
173+
void lockupViewModelOneRowReversed()
174+
throws FileNotFoundException, JsonParserException {
175+
final var json = JsonParser.object().from(new FileInputStream(getMockPath(
176+
YoutubeStreamInfoItemTest.class, "lockupViewModelOneRowReversed") + ".json"));
177+
final var timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.DEFAULT);
178+
final var extractor = new YoutubeStreamInfoItemLockupExtractor(json, timeAgoParser) {
179+
// Channel tabs use 1-row format at index 0
180+
@Override
181+
protected int getInfoMetadataRowIndex() {
182+
return 0;
183+
}
184+
};
185+
assertAll(
186+
() -> assertEquals(StreamType.VIDEO_STREAM, extractor.getStreamType()),
187+
() -> assertEquals("Test Video One Row Reversed", extractor.getName()),
188+
() -> assertEquals("1 day ago", extractor.getTextualUploadDate()),
189+
() -> assertNotNull(extractor.getUploadDate()),
190+
() -> assertEquals(1200, extractor.getViewCount()), // 1.2K views
191+
() -> assertEquals(300, extractor.getDuration()) // 5:00
192+
);
193+
}
194+
195+
/**
196+
* Tests that the info row search handles 1-row format with only view count
197+
* (no date text present) - e.g. for livestreams with watching count only.
198+
*/
199+
@Test
200+
void lockupViewModelOneRowViewsOnly()
201+
throws FileNotFoundException, JsonParserException {
202+
final var json = JsonParser.object().from(new FileInputStream(getMockPath(
203+
YoutubeStreamInfoItemTest.class, "lockupViewModelOneRowViewsOnly") + ".json"));
204+
final var timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.DEFAULT);
205+
final var extractor = new YoutubeStreamInfoItemLockupExtractor(json, timeAgoParser) {
206+
// Channel tabs use 1-row format at index 0
207+
@Override
208+
protected int getInfoMetadataRowIndex() {
209+
return 0;
210+
}
211+
};
212+
assertAll(
213+
() -> assertEquals(StreamType.LIVE_STREAM, extractor.getStreamType()),
214+
() -> assertEquals("Test Video One Row Views Only", extractor.getName()),
215+
() -> assertNull(extractor.getTextualUploadDate()),
216+
() -> assertNull(extractor.getUploadDate()),
217+
() -> assertEquals(500, extractor.getViewCount()), // 500 watching
218+
() -> assertEquals(-1, extractor.getDuration())
219+
);
220+
}
221+
141222
@Test
142223
void emptyTitle() throws FileNotFoundException, JsonParserException {
143224
final var json = JsonParser.object().from(new FileInputStream(getMockPath(

0 commit comments

Comments
 (0)