@@ -571,9 +571,29 @@ async def extract_pine_source(self, script_url: str) -> dict:
571571 result ['error' ] = 'not open-source'
572572 return result
573573
574- # Strategy 1: Try copy/clipboard button fallback first (some pages expose raw source via copy button )
574+ # Strategy 1: Try copy/clipboard fallback first — but ensure Source Code tab is opened (if present )
575575 source_code = ''
576576 try :
577+ # Try to click the Source code tab first to reveal code & copy icon
578+ try :
579+ tab_selectors = ['[role="tab"]:has-text("Source code")' ,'button:has-text("Source code")' ,'div:has-text("Source code"):not(:has(*))' ,'button:has-text("Source")' ]
580+ for s in tab_selectors :
581+ try :
582+ t = self .page .locator (s )
583+ if await t .count () > 0 and await t .first .is_visible ():
584+ try :
585+ await t .first .click ()
586+ await self .page .wait_for_timeout (600 )
587+ if getattr (self , 'debug_pages' , False ):
588+ print (f" [debug] Clicked Source code tab using selector: { s } " )
589+ break
590+ except Exception :
591+ continue
592+ except Exception :
593+ continue
594+ except Exception :
595+ pass
596+
577597 source_code = await self ._try_copy_button_extraction ()
578598 if source_code and getattr (self , 'debug_pages' , False ):
579599 print (f" [debug] Extracted source via copy-button fallback for { script_url } (chars: { len (source_code )} )" )
@@ -811,7 +831,14 @@ async def _try_embedded_extraction(self) -> str:
811831 return ''
812832
813833 async def _try_copy_button_extraction (self ) -> str :
814- """Try extracting source code from copy-to-clipboard buttons and nearby elements."""
834+ """Try extracting source code from copy-to-clipboard buttons and nearby elements.
835+
836+ This attempts (in order):
837+ - read common data-clipboard attributes
838+ - scan nearby code/textarea elements
839+ - attach a 'copy' event listener, click the copy button, and capture clipboard payload
840+ - as fallback try navigator.clipboard.readText()
841+ """
815842 try :
816843 return await self .page .evaluate (r'''() => {
817844 function looksLikePine(t) {
@@ -842,6 +869,58 @@ async def _try_copy_button_extraction(self) -> str:
842869 '.tv-copy'
843870 ];
844871
872+ // Helper to attempt click + capture via copy event
873+ async function tryClickAndCapture(b) {
874+ try {
875+ // install listener
876+ window.__copied_source__ = '';
877+ const handler = (e) => {
878+ try {
879+ const txt = (e.clipboardData && e.clipboardData.getData('text/plain')) || '';
880+ if (txt) window.__copied_source__ = txt;
881+ } catch(err) {}
882+ };
883+ document.addEventListener('copy', handler, {once: true});
884+
885+ // monkeypatch navigator.clipboard.writeText to capture direct writes
886+ let origWrite = null;
887+ try {
888+ if (navigator.clipboard && navigator.clipboard.writeText) {
889+ origWrite = navigator.clipboard.writeText;
890+ navigator.clipboard.writeText = (s) => { window.__copied_source__ = (s || ''); return Promise.resolve(); };
891+ } else {
892+ navigator.clipboard = { writeText: (s) => { window.__copied_source__ = (s || ''); return Promise.resolve(); } };
893+ }
894+ } catch(e) {}
895+
896+ try { b.click(); } catch(e) {}
897+
898+ // wait briefly for copy to happen (up to 2s)
899+ const start = Date.now();
900+ while ((Date.now() - start) < 2000) {
901+ await new Promise(r => setTimeout(r, 150));
902+ if (window.__copied_source__) break;
903+ }
904+
905+ // restore original writeText
906+ try { if (origWrite && navigator.clipboard) navigator.clipboard.writeText = origWrite; } catch(e) {}
907+
908+ // remove handler if still present
909+ try { document.removeEventListener('copy', handler); } catch(e) {}
910+
911+ if (window.__copied_source__ && looksLikePine(window.__copied_source__)) return window.__copied_source__;
912+
913+ // Fallback: try navigator.clipboard.readText()
914+ try {
915+ if (navigator.clipboard && navigator.clipboard.readText) {
916+ const cb = await navigator.clipboard.readText();
917+ if (cb && looksLikePine(cb)) return cb;
918+ }
919+ } catch(e) {}
920+ } catch(e) {}
921+ return '';
922+ }
923+
845924 for (const sel of btnSelectors) {
846925 try {
847926 const b = document.querySelector(sel);
@@ -859,19 +938,20 @@ async def _try_copy_button_extraction(self) -> str:
859938 if (looksLikePine(txt)) return txt;
860939 }
861940
862- // Try to click and shortly wait for injected inputs/textareas
863- try { b.click(); } catch(e) {}
941+ // Click and try to capture via copy event
942+ const captured = await tryClickAndCapture(b);
943+ if (captured && looksLikePine(captured)) return captured;
864944
865- // After clicking, look for temporary elements or inputs with content
945+ // After clicking also check any temporary inputs/textarea
866946 const inputs = Array.from(document.querySelectorAll('textarea, input'));
867947 for (const inp of inputs) {
868948 const v = inp.value || inp.textContent || '';
869949 if (looksLikePine(v)) return v;
870950 }
871951
872952 // Check selection
873- const sel = (window.getSelection && window.getSelection().toString()) || '';
874- if (looksLikePine(sel )) return sel ;
953+ const seltxt = (window.getSelection && window.getSelection().toString()) || '';
954+ if (looksLikePine(seltxt )) return seltxt ;
875955 } catch(e) {}
876956 }
877957
@@ -880,6 +960,57 @@ async def _try_copy_button_extraction(self) -> str:
880960 except Exception :
881961 return ''
882962
963+ async def dump_copy_diagnostics (self , url : str ):
964+ """Visit a single script URL and print diagnostics for copy-button capture attempts."""
965+ await self .setup ()
966+ try :
967+ print (f"Diagnostics: visiting { url } " )
968+ await self .page .goto (url , wait_until = 'networkidle' , timeout = 60000 )
969+ await self .page .wait_for_timeout (800 )
970+ # try to open source tab so copy-button inside source becomes visible
971+ try :
972+ tab_selectors = ['[role="tab"]:has-text("Source code")' ,'button:has-text("Source code")' ,'div:has-text("Source code"):not(:has(*))' ,'button:has-text("Source")' ]
973+ for s in tab_selectors :
974+ try :
975+ t = self .page .locator (s )
976+ if await t .count () > 0 and await t .first .is_visible ():
977+ try :
978+ await t .first .click ()
979+ await self .page .wait_for_timeout (800 )
980+ print (' [debug] Clicked Source code tab for diagnostics' )
981+ break
982+ except Exception :
983+ continue
984+ except Exception :
985+ continue
986+ except Exception :
987+ pass
988+
989+ data = await self .page .evaluate (r'''async () => {
990+ function looksLikePine(t){ return t && (t.includes('//@version') || t.includes('indicator(') || t.includes('strategy(') || t.includes('library(') || t.includes('plot(')); }
991+ const attrs = ['data-clipboard-text','data-clipboard','data-copy','data-clipboard-text-original','data-clipboard-text-original-value'];
992+ const btnSelectors = ['button[aria-label*="copy"]', 'button[title*="Copy"]','button[aria-label*="Copy to clipboard"]','.copy-to-clipboard','[data-qa-id*="copy"]','[class*="copy"]','.tv-copy'];
993+ const found = {attrs: [], buttons: []};
994+ for (const a of attrs){ const el=document.querySelector('['+a+']'); if(el) found.attrs.push({attr: a, sample:(el.getAttribute(a)||'').slice(0,200) }); }
995+ for (const sel of btnSelectors){ const nodes=Array.from(document.querySelectorAll(sel)); nodes.forEach((b,i)=>{ const dialog=b.closest('[role="dialog"]')||b.closest('div')||document.body; const code=dialog.querySelector('pre, code, textarea, [class*="code"], [class*="source"]'); found.buttons.push({selector: sel, idx:i, text: (b.innerText||'').slice(0,120), nearby: (code? (code.value||code.textContent||'').slice(0,200):'')}); }); }
996+ // Try clicking each button and capture copy event / clipboard
997+ async function tryClick(b){ window.__copied__=''; const handler=(e)=>{ try{ window.__copied__=(e.clipboardData&&e.clipboardData.getData('text/plain'))||'';}catch{} }; document.addEventListener('copy', handler, {once:true}); try{ b.click(); }catch{}; const start=Date.now(); while((Date.now()-start)<2000){ await new Promise(r=>setTimeout(r,150)); if(window.__copied__) break; } try{ document.removeEventListener('copy', handler); }catch{}; let cb=window.__copied__||''; try{ if(!cb && navigator.clipboard && navigator.clipboard.readText) cb=await navigator.clipboard.readText(); }catch{}; return cb.slice(0,400); }
998+ const caps=[]; const btns=Array.from(document.querySelectorAll(btnSelectors.join(',')));
999+ for (const b of btns){ caps.push(await tryClick(b)); }
1000+ return {found: found, captures: caps};
1001+ }''' )
1002+ # print results
1003+ for a in data .get ('found' , {}).get ('attrs' , []):
1004+ print (f"[ATTR] { a ['attr' ]} sample: { a ['sample' ]} " )
1005+ for b in data .get ('found' , {}).get ('buttons' , []):
1006+ print (f"[BUTTON] { b ['selector' ]} idx={ b ['idx' ]} text={ b ['text' ]!r} nearby_sample={ b ['nearby' ]!r} " )
1007+ caps = data .get ('captures' , [])
1008+ for i , c in enumerate (caps ):
1009+ print (f"[CAPTURE { i } ] { c !r} " )
1010+ finally :
1011+ await self .cleanup ()
1012+ return
1013+
8831014 def _normalize_source (self , source : str ) -> str :
8841015 """Normalize source text: whitespace, unicode normalization, and attempt to fix common mojibake."""
8851016 if not isinstance (source , str ):
@@ -906,6 +1037,29 @@ def weird_count(s):
9061037 # Remove stray control characters except tabs and newlines
9071038 src = '' .join (ch if ch >= ' ' or ch in '\t \n ' else ' ' for ch in src )
9081039
1040+ # Common mojibake replacements (cover frequent sequences from UTF-8->Latin1 mishandling)
1041+ mojibake_map = {
1042+ 'â¢' : '•' ,
1043+ 'â' : '—' ,
1044+ 'â¦' : '…' ,
1045+ 'â¢' : '™' ,
1046+ '©' : '©' ,
1047+ 'Â ' : ' ' ,
1048+ 'é' : 'é' ,
1049+ 'è' : 'è' ,
1050+ 'â' : 'â' ,
1051+ 'ô' : 'ô' ,
1052+ 'ë' : 'ë' ,
1053+ 'â' : '─' ,
1054+ 'â' : '-' ,
1055+ 'â¡' : '!' ,
1056+ 'â' : '•' ,
1057+ 'ï¸' : '' ,
1058+ }
1059+ for k , v in mojibake_map .items ():
1060+ if k in src :
1061+ src = src .replace (k , v )
1062+
9091063 # Trim trailing whitespace on lines
9101064 src = '\n ' .join (line .rstrip () for line in src .splitlines ())
9111065
@@ -1304,6 +1458,7 @@ async def main():
13041458 parser .add_argument ('--no-resume' , action = 'store_true' , help = 'Start fresh (ignore progress)' )
13051459 parser .add_argument ('--max-pages' , '-p' , type = int , default = 20 , help = 'Maximum pages to scan or visit' )
13061460 parser .add_argument ('--debug-pages' , action = 'store_true' , help = 'Verbose page visit logging (debug)' )
1461+ parser .add_argument ('--dump-copy' , action = 'store_true' , help = 'Diagnostic: inspect copy-button(s) and captured clipboard payload on a single script URL' )
13071462 parser .add_argument ('--status' , action = 'store_true' , help = 'Show status of output directory (progress files, existing .pine files) and exit' )
13081463
13091464 args = parser .parse_args ()
@@ -1320,6 +1475,12 @@ async def main():
13201475 if args .status :
13211476 scraper .print_status ()
13221477 return
1478+
1479+ if args .dump_copy :
1480+ if not args .url or '/script/' not in args .url :
1481+ parser .error ('--dump-copy requires a single script URL (contains "/script/")' )
1482+ await scraper .dump_copy_diagnostics (args .url )
1483+ return
13231484
13241485 await scraper .download_all (
13251486 base_url = args .url ,
0 commit comments