Skip to content

Commit bcff284

Browse files
committed
dump_copy_diagnostics: open Source code tab before scanning/clicking copy buttons
1 parent 07f082e commit bcff284

1 file changed

Lines changed: 168 additions & 7 deletions

File tree

tv_downloader_enhanced.py

Lines changed: 168 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -571,9 +571,29 @@ async def extract_pine_source(self, script_url: str) -> dict:
571571
result['error'] = 'not open-source'
572572
return result
573573

574-
# Strategy 1: Try copy/clipboard button fallback first (some pages expose raw source via copy button)
574+
# Strategy 1: Try copy/clipboard fallback first — but ensure Source Code tab is opened (if present)
575575
source_code = ''
576576
try:
577+
# Try to click the Source code tab first to reveal code & copy icon
578+
try:
579+
tab_selectors = ['[role="tab"]:has-text("Source code")','button:has-text("Source code")','div:has-text("Source code"):not(:has(*))','button:has-text("Source")']
580+
for s in tab_selectors:
581+
try:
582+
t = self.page.locator(s)
583+
if await t.count() > 0 and await t.first.is_visible():
584+
try:
585+
await t.first.click()
586+
await self.page.wait_for_timeout(600)
587+
if getattr(self, 'debug_pages', False):
588+
print(f" [debug] Clicked Source code tab using selector: {s}")
589+
break
590+
except Exception:
591+
continue
592+
except Exception:
593+
continue
594+
except Exception:
595+
pass
596+
577597
source_code = await self._try_copy_button_extraction()
578598
if source_code and getattr(self, 'debug_pages', False):
579599
print(f" [debug] Extracted source via copy-button fallback for {script_url} (chars: {len(source_code)})")
@@ -811,7 +831,14 @@ async def _try_embedded_extraction(self) -> str:
811831
return ''
812832

813833
async def _try_copy_button_extraction(self) -> str:
814-
"""Try extracting source code from copy-to-clipboard buttons and nearby elements."""
834+
"""Try extracting source code from copy-to-clipboard buttons and nearby elements.
835+
836+
This attempts (in order):
837+
- read common data-clipboard attributes
838+
- scan nearby code/textarea elements
839+
- attach a 'copy' event listener, click the copy button, and capture clipboard payload
840+
- as fallback try navigator.clipboard.readText()
841+
"""
815842
try:
816843
return await self.page.evaluate(r'''() => {
817844
function looksLikePine(t) {
@@ -842,6 +869,58 @@ async def _try_copy_button_extraction(self) -> str:
842869
'.tv-copy'
843870
];
844871
872+
// Helper to attempt click + capture via copy event
873+
async function tryClickAndCapture(b) {
874+
try {
875+
// install listener
876+
window.__copied_source__ = '';
877+
const handler = (e) => {
878+
try {
879+
const txt = (e.clipboardData && e.clipboardData.getData('text/plain')) || '';
880+
if (txt) window.__copied_source__ = txt;
881+
} catch(err) {}
882+
};
883+
document.addEventListener('copy', handler, {once: true});
884+
885+
// monkeypatch navigator.clipboard.writeText to capture direct writes
886+
let origWrite = null;
887+
try {
888+
if (navigator.clipboard && navigator.clipboard.writeText) {
889+
origWrite = navigator.clipboard.writeText;
890+
navigator.clipboard.writeText = (s) => { window.__copied_source__ = (s || ''); return Promise.resolve(); };
891+
} else {
892+
navigator.clipboard = { writeText: (s) => { window.__copied_source__ = (s || ''); return Promise.resolve(); } };
893+
}
894+
} catch(e) {}
895+
896+
try { b.click(); } catch(e) {}
897+
898+
// wait briefly for copy to happen (up to 2s)
899+
const start = Date.now();
900+
while ((Date.now() - start) < 2000) {
901+
await new Promise(r => setTimeout(r, 150));
902+
if (window.__copied_source__) break;
903+
}
904+
905+
// restore original writeText
906+
try { if (origWrite && navigator.clipboard) navigator.clipboard.writeText = origWrite; } catch(e) {}
907+
908+
// remove handler if still present
909+
try { document.removeEventListener('copy', handler); } catch(e) {}
910+
911+
if (window.__copied_source__ && looksLikePine(window.__copied_source__)) return window.__copied_source__;
912+
913+
// Fallback: try navigator.clipboard.readText()
914+
try {
915+
if (navigator.clipboard && navigator.clipboard.readText) {
916+
const cb = await navigator.clipboard.readText();
917+
if (cb && looksLikePine(cb)) return cb;
918+
}
919+
} catch(e) {}
920+
} catch(e) {}
921+
return '';
922+
}
923+
845924
for (const sel of btnSelectors) {
846925
try {
847926
const b = document.querySelector(sel);
@@ -859,19 +938,20 @@ async def _try_copy_button_extraction(self) -> str:
859938
if (looksLikePine(txt)) return txt;
860939
}
861940
862-
// Try to click and shortly wait for injected inputs/textareas
863-
try { b.click(); } catch(e) {}
941+
// Click and try to capture via copy event
942+
const captured = await tryClickAndCapture(b);
943+
if (captured && looksLikePine(captured)) return captured;
864944
865-
// After clicking, look for temporary elements or inputs with content
945+
// After clicking also check any temporary inputs/textarea
866946
const inputs = Array.from(document.querySelectorAll('textarea, input'));
867947
for (const inp of inputs) {
868948
const v = inp.value || inp.textContent || '';
869949
if (looksLikePine(v)) return v;
870950
}
871951
872952
// Check selection
873-
const sel = (window.getSelection && window.getSelection().toString()) || '';
874-
if (looksLikePine(sel)) return sel;
953+
const seltxt = (window.getSelection && window.getSelection().toString()) || '';
954+
if (looksLikePine(seltxt)) return seltxt;
875955
} catch(e) {}
876956
}
877957
@@ -880,6 +960,57 @@ async def _try_copy_button_extraction(self) -> str:
880960
except Exception:
881961
return ''
882962

963+
async def dump_copy_diagnostics(self, url: str):
964+
"""Visit a single script URL and print diagnostics for copy-button capture attempts."""
965+
await self.setup()
966+
try:
967+
print(f"Diagnostics: visiting {url}")
968+
await self.page.goto(url, wait_until='networkidle', timeout=60000)
969+
await self.page.wait_for_timeout(800)
970+
# try to open source tab so copy-button inside source becomes visible
971+
try:
972+
tab_selectors = ['[role="tab"]:has-text("Source code")','button:has-text("Source code")','div:has-text("Source code"):not(:has(*))','button:has-text("Source")']
973+
for s in tab_selectors:
974+
try:
975+
t = self.page.locator(s)
976+
if await t.count() > 0 and await t.first.is_visible():
977+
try:
978+
await t.first.click()
979+
await self.page.wait_for_timeout(800)
980+
print(' [debug] Clicked Source code tab for diagnostics')
981+
break
982+
except Exception:
983+
continue
984+
except Exception:
985+
continue
986+
except Exception:
987+
pass
988+
989+
data = await self.page.evaluate(r'''async () => {
990+
function looksLikePine(t){ return t && (t.includes('//@version') || t.includes('indicator(') || t.includes('strategy(') || t.includes('library(') || t.includes('plot(')); }
991+
const attrs = ['data-clipboard-text','data-clipboard','data-copy','data-clipboard-text-original','data-clipboard-text-original-value'];
992+
const btnSelectors = ['button[aria-label*="copy"]', 'button[title*="Copy"]','button[aria-label*="Copy to clipboard"]','.copy-to-clipboard','[data-qa-id*="copy"]','[class*="copy"]','.tv-copy'];
993+
const found = {attrs: [], buttons: []};
994+
for (const a of attrs){ const el=document.querySelector('['+a+']'); if(el) found.attrs.push({attr: a, sample:(el.getAttribute(a)||'').slice(0,200) }); }
995+
for (const sel of btnSelectors){ const nodes=Array.from(document.querySelectorAll(sel)); nodes.forEach((b,i)=>{ const dialog=b.closest('[role="dialog"]')||b.closest('div')||document.body; const code=dialog.querySelector('pre, code, textarea, [class*="code"], [class*="source"]'); found.buttons.push({selector: sel, idx:i, text: (b.innerText||'').slice(0,120), nearby: (code? (code.value||code.textContent||'').slice(0,200):'')}); }); }
996+
// Try clicking each button and capture copy event / clipboard
997+
async function tryClick(b){ window.__copied__=''; const handler=(e)=>{ try{ window.__copied__=(e.clipboardData&&e.clipboardData.getData('text/plain'))||'';}catch{} }; document.addEventListener('copy', handler, {once:true}); try{ b.click(); }catch{}; const start=Date.now(); while((Date.now()-start)<2000){ await new Promise(r=>setTimeout(r,150)); if(window.__copied__) break; } try{ document.removeEventListener('copy', handler); }catch{}; let cb=window.__copied__||''; try{ if(!cb && navigator.clipboard && navigator.clipboard.readText) cb=await navigator.clipboard.readText(); }catch{}; return cb.slice(0,400); }
998+
const caps=[]; const btns=Array.from(document.querySelectorAll(btnSelectors.join(',')));
999+
for (const b of btns){ caps.push(await tryClick(b)); }
1000+
return {found: found, captures: caps};
1001+
}''')
1002+
# print results
1003+
for a in data.get('found', {}).get('attrs', []):
1004+
print(f"[ATTR] {a['attr']} sample: {a['sample']}")
1005+
for b in data.get('found', {}).get('buttons', []):
1006+
print(f"[BUTTON] {b['selector']} idx={b['idx']} text={b['text']!r} nearby_sample={b['nearby']!r}")
1007+
caps = data.get('captures', [])
1008+
for i, c in enumerate(caps):
1009+
print(f"[CAPTURE {i}] {c!r}")
1010+
finally:
1011+
await self.cleanup()
1012+
return
1013+
8831014
def _normalize_source(self, source: str) -> str:
8841015
"""Normalize source text: whitespace, unicode normalization, and attempt to fix common mojibake."""
8851016
if not isinstance(source, str):
@@ -906,6 +1037,29 @@ def weird_count(s):
9061037
# Remove stray control characters except tabs and newlines
9071038
src = ''.join(ch if ch >= ' ' or ch in '\t\n' else ' ' for ch in src)
9081039

1040+
# Common mojibake replacements (cover frequent sequences from UTF-8->Latin1 mishandling)
1041+
mojibake_map = {
1042+
'•': '•',
1043+
'—': '—',
1044+
'…': '…',
1045+
'™': '™',
1046+
'©': '©',
1047+
' ': ' ',
1048+
'é': 'é',
1049+
'è': 'è',
1050+
'â': 'â',
1051+
'ô': 'ô',
1052+
'ë': 'ë',
1053+
'═': '─',
1054+
'â•': '-',
1055+
'⚡': '!',
1056+
'⚙': '•',
1057+
'️': '',
1058+
}
1059+
for k, v in mojibake_map.items():
1060+
if k in src:
1061+
src = src.replace(k, v)
1062+
9091063
# Trim trailing whitespace on lines
9101064
src = '\n'.join(line.rstrip() for line in src.splitlines())
9111065

@@ -1304,6 +1458,7 @@ async def main():
13041458
parser.add_argument('--no-resume', action='store_true', help='Start fresh (ignore progress)')
13051459
parser.add_argument('--max-pages', '-p', type=int, default=20, help='Maximum pages to scan or visit')
13061460
parser.add_argument('--debug-pages', action='store_true', help='Verbose page visit logging (debug)')
1461+
parser.add_argument('--dump-copy', action='store_true', help='Diagnostic: inspect copy-button(s) and captured clipboard payload on a single script URL')
13071462
parser.add_argument('--status', action='store_true', help='Show status of output directory (progress files, existing .pine files) and exit')
13081463

13091464
args = parser.parse_args()
@@ -1320,6 +1475,12 @@ async def main():
13201475
if args.status:
13211476
scraper.print_status()
13221477
return
1478+
1479+
if args.dump_copy:
1480+
if not args.url or '/script/' not in args.url:
1481+
parser.error('--dump-copy requires a single script URL (contains "/script/")')
1482+
await scraper.dump_copy_diagnostics(args.url)
1483+
return
13231484

13241485
await scraper.download_all(
13251486
base_url=args.url,

0 commit comments

Comments
 (0)