oil-agent/oil_agent.py at main · laurentvv/oil-agent · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
Oil Market Monitoring Agent
============================
smolagents 1.24.0 + LiteLLM + llama.cpp (Qwen3.5-9B)
Surveille les événements géopolitiques et industriels pouvant faire rebondir le cours du pétrole.
Envoie des alertes email via relais SMTP (Postfix local).
"""

import json
import smtplib
import hashlib
import logging
import subprocess
import time
import requests
import atexit
import sys
import io
from datetime import datetime
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from pathlib import Path

from smolagents import (
    CodeAgent,
    LiteLLMModel,
    DuckDuckGoSearchTool,
    VisitWebpageTool,
    Tool,
)

import dspy
from pydantic import BaseModel, Field, field_validator
from typing import List, Literal, Optional

# ─────────────────────────────────────────────
# Context Management
# ─────────────────────────────────────────────
from context_management import (
    HistoryCompressor,
    SlidingWindowManager,
    MemoryCleaner,
    AgentState,
    build_optimized_prompt_context,
)

# ─────────────────────────────────────────────
# DSPy Configuration & Models
# ─────────────────────────────────────────────

# Modèle Pydantic pour la sortie structurée
class OilEvent(BaseModel):
    """Structure d'un événement pétrolier."""
    id: str = Field(..., description="ID unique (ex: iran_hormuz_20240311)")
    category: Literal["Iran", "Refinery", "OPEC", "Gas", "Shipping", "Geopolitical"]
    title: str
    impact_score: int = Field(..., ge=0, le=10)
    certainty_score: Optional[float] = Field(0.7, ge=0.0, le=1.0, description="Niveau de certitude (0.0-1.0)")
    urgency: Optional[Literal["Breaking", "Recent", "Developing", "Background"]] = Field("Recent", description="Urgence de l'événement")
    summary: str
    price_impact: str = Field(..., description="Ex: +$2-4/barrel")
    source_hint: str
    publication_date: Optional[str] = Field(None, description="Date de publication si disponible")

class OilEventsResponse(BaseModel):
    """Structure de réponse avec la liste des événements."""
    events: List[OilEvent]
    confidence_score: float = Field(..., description="Score de confiance global (0.0-1.0)")

# Signature DSPy
class OilEventSignature(dspy.Signature):
    """Extract structured oil events from intelligence. ALL fields required per event."""

    current_date: str = dspy.InputField(desc="Date (YYYY-MM-DD)")
    current_datetime: str = dspy.InputField(desc="Timestamp")
    alert_threshold: int = dspy.InputField(desc="Alert threshold (0-10)")
    news_sources: list[str] = dspy.InputField(desc="Priority news sources")
    raw_intelligence: str = dspy.InputField(desc="Collected intelligence data")

    events: List[OilEvent] = dspy.OutputField(desc="Oil events with all required fields")
    confidence_score: float = dspy.OutputField(desc="Global confidence (0.0-1.0)")


# Module avec Chain of Thought
class OilEventAnalyzer(dspy.Module):
    """Analyseur d'événements pétroliers avec raisonnement explicite."""

    def __init__(self):
        super().__init__()
        self.analyze = dspy.ChainOfThought(OilEventSignature)

    def forward(self, **kwargs):
        # Utiliser dspy.Predict ou ChainOfThought
        pred = self.analyze(**kwargs)

        return pred

def reduce_intelligence_for_dspy(raw_intelligence: str, max_chars: int) -> str:
    """
    Réduit l'intelligence brute pour le traitement DSPy en:
    - Gardant les sections les plus pertinentes
    - Supprimant les doublons consécutifs
    - Limitant la taille totale

    Args:
        raw_intelligence: Intelligence brute collectée
        max_chars: Nombre maximum de caractères à conserver

    Returns:
        Intelligence réduite optimisée pour DSPy
    """
    # Stratégie 1: Garder les en-têtes et les résumés de chaque outil
    lines = raw_intelligence.split('\n')
    essential_lines = []

    for line in lines:
        # Garder les en-têtes de section (===, ---)
        if line.strip().startswith(('===', '---')):
            essential_lines.append(line)
        # Garder les résumés (lignes avec 🔍, 📊, ✅)
        elif any(marker in line for marker in ['🔍', '📊', '✅']):
            essential_lines.append(line)
        # Garder les 50 premières lignes importantes
        elif len(essential_lines) < 50:
            essential_lines.append(line)

    # Stratégie 2: Supprimer les doublons consécutifs (pas tous les doublons)
    filtered_lines = []
    prev_line_stripped = None
    consecutive_empty_count = 0

    for line in essential_lines:
        line_stripped = line.strip()

        # Limiter les lignes vides consécutives à maximum 2
        if not line_stripped:
            consecutive_empty_count += 1
            if consecutive_empty_count <= 2:
                filtered_lines.append(line)
        else:
            consecutive_empty_count = 0
            # Éviter les doublons consécutifs
            if line_stripped != prev_line_stripped:
                filtered_lines.append(line)
                prev_line_stripped = line_stripped

    # Stratégie 3: Limiter la taille totale
    reduced_intelligence = '\n'.join(filtered_lines)

    # Si encore trop long, tronquer avec un message d'avertissement
    if len(reduced_intelligence) > max_chars:
        reduced_intelligence = reduced_intelligence[:max_chars] + '\n\n[... Intelligence tronquée à ' + str(max_chars) + ' caractères ...]'

    log.info(f"📦 Intelligence réduite : {len(raw_intelligence)} → {len(reduced_intelligence)} caractères")

    return reduced_intelligence


def validate_and_fix_events(events: list, current_date: str) -> list:
    """Valide et nettoie les événements produits par le LLM."""
    valid_events = []
    valid_categories = ["Iran", "Refinery", "OPEC", "Gas", "Shipping", "Geopolitical"]

    for i, event in enumerate(events):
        try:
            # Transformation en dict si c'est un objet Pydantic
            e_dict = event if isinstance(event, dict) else (event.model_dump() if hasattr(event, 'model_dump') else {})

            # 1. Vérification des champs obligatoires minimums
            if not e_dict.get("title") or not e_dict.get("category"):
                log.warning(f"⚠️ Event {i} ignoré : titre ou catégorie manquant")
                continue

            # 1.5. Corriger les catégories invalides
            category = e_dict.get("category", "")
            if category not in valid_categories:
                log.warning(f"⚠️ Event {i} catégorie invalide '{category}', tentative de correction...")
                # Essayer de mapper vers une catégorie valide
                category_lower = category.lower()
                if "iran" in category_lower or "hormuz" in category_lower or "persian" in category_lower:
                    e_dict["category"] = "Iran"
                elif "refinery" in category_lower or "plant" in category_lower:
                    e_dict["category"] = "Refinery"
                elif "opec" in category_lower or "production" in category_lower or "supply" in category_lower:
                    e_dict["category"] = "OPEC"
                elif "gas" in category_lower or "lng" in category_lower:
                    e_dict["category"] = "Gas"
                elif "shipping" in category_lower or "tanker" in category_lower or "red sea" in category_lower or "suez" in category_lower:
                    e_dict["category"] = "Shipping"
                elif "geopolitical" in category_lower or "war" in category_lower or "conflict" in category_lower:
                    e_dict["category"] = "Geopolitical"
                else:
                    # Si aucune correspondance, utiliser "Geopolitical" par défaut
                    log.warning(f"⚠️ Event {i} catégorie '{category}' mappée vers 'Geopolitical'")
                    e_dict["category"] = "Geopolitical"

            # 2. Valeurs par défaut pour les champs optionnels manquants
            if "impact_score" not in e_dict:
                e_dict["impact_score"] = 5
            if "urgency" not in e_dict or not e_dict["urgency"]:
                e_dict["urgency"] = "Recent"
            if "summary" not in e_dict:
                e_dict["summary"] = "No summary provided."
            if "price_impact" not in e_dict:
                e_dict["price_impact"] = "Unknown"
            if "source_hint" not in e_dict:
                e_dict["source_hint"] = "Multiple sources"
            if "publication_date" not in e_dict or not e_dict["publication_date"]:
                e_dict["publication_date"] = current_date
            if "certainty_score" not in e_dict:
                e_dict["certainty_score"] = 0.7
            if "id" not in e_dict:
                e_dict["id"] = event_fingerprint(e_dict["title"], e_dict["category"])

            valid_events.append(e_dict)
        except Exception as ex:
            log.error(f"❌ Erreur lors de la validation de l'événement {i} : {ex}")

    return valid_events

def configure_dspy():
    """Configure DSPy avec le modèle llama-server défini dans CONFIG."""
    lm = dspy.LM(
        model=f"openai/{CONFIG.model.name}",
        api_base=CONFIG.model.api_base,
        api_key="dummy",  # llama-server ne nécessite pas de clé API
        model_type="chat",
        litellm_params={"timeout": CONFIG.model.dspy_timeout}  # Timeout configurable via litellm_params
    )
    dspy.configure(lm=lm, adapter=dspy.JSONAdapter())
    log.info(f"⚙️  DSPy configuré avec timeout: {CONFIG.model.dspy_timeout}s")
    return lm

# ─────────────────────────────────────────────
# Configuration
# ─────────────────────────────────────────────

# Modèles Pydantic pour la validation de la configuration
class ModelConfig(BaseModel):
    """Configuration du modèle LLM."""
    name: str = Field(..., description="Nom du modèle")
    path: str = Field(..., description="Chemin vers le fichier du modèle")
    api_base: str = Field(..., description="URL de base de l'API")
    num_ctx: int = Field(..., gt=0, description="Taille du contexte")
    provider: str = Field(..., description="Fournisseur du modèle")
    dspy_timeout: int = Field(default=600, ge=30, description="Timeout DSPy en secondes (min: 30s)")
    max_intelligence_chars: int = Field(default=20000, ge=5000, description="Taille max de l'intelligence pour DSPy (car)")

    @field_validator('path')
    @classmethod
    def path_exists(cls, v):
        """Vérifie que le fichier du modèle existe."""
        path = Path(v)
        if not path.exists():
            raise ValueError(f"Modèle introuvable : {v}")
        if not path.is_file():
            raise ValueError(f"Le chemin n'est pas un fichier : {v}")
        return v

class LlamaServerConfig(BaseModel):
    """Configuration du serveur llama-server."""
    executable: str = Field(default="llama-server", description="Exécutable du serveur")
    n_gpu_layers: int = Field(default=-1, ge=-1, description="Nombre de couches GPU (-1 pour toutes)")
    n_threads: int = Field(default=0, ge=-1, description="Nombre de threads CPU (0 pour auto)")
    ctx_size: int = Field(default=8192, gt=0, description="Taille du contexte")
    batch_size: int = Field(default=512, gt=0, description="Taille du batch")
    ubatch_size: int = Field(default=128, gt=0, description="Taille du micro-batch")
    cache_type_k: str = Field(default="f16", description="Type de cache K")
    cache_type_v: str = Field(default="f16", description="Type de cache V")
    host: str = Field(default="0.0.0.0", description="Adresse d'écoute")
    port: int = Field(default=8080, ge=1, le=65535, description="Port d'écoute")

class EmailConfig(BaseModel):
    """Configuration de l'envoi d'emails."""
    smtp_host: str = Field(default="localhost", description="Hôte SMTP")
    smtp_port: int = Field(default=25, ge=1, le=65535, description="Port SMTP")
    email_from: str = Field(default="oil-monitor@localhost", description="Expéditeur")
    email_to: str = Field(default="admin@example.com", description="Destinataire")
    email_subject_prefix: str = Field(default="[OIL-ALERT]", description="Préfixe du sujet")
    send_emails: bool = Field(default=False, description="Activer l'envoi d'emails")

class PersistenceConfig(BaseModel):
    """Configuration de la persistance."""
    history_file: str = Field(default="logs/email_history.json", description="Fichier d'historique")
    events_db: str = Field(default="logs/events_seen.json", description="Base des événements vus")
    dataset_file: str = Field(default="data/oil_intelligence_dataset.jsonl", description="Fichier de dataset")

class MonitoringConfig(BaseModel):
    """Configuration de la surveillance."""
    alert_threshold: int = Field(default=6, ge=0, le=10, description="Seuil d'alerte")
    news_sources: List[str] = Field(default_factory=list, description="Sources d'actualités")
    timezone: str = Field(default="Europe/Paris", description="Fuseau horaire")
    recent_news_hours: int = Field(default=24, gt=0, description="Heures d'actualités récentes")

class Config(BaseModel):
    """Configuration principale."""
    model: ModelConfig
    llama_server: LlamaServerConfig
    email: EmailConfig
    persistence: PersistenceConfig
    monitoring: MonitoringConfig

# Charger la configuration depuis config.json
def load_config() -> Config:
    """Charge et valide la configuration depuis config.json."""
    config_path = Path("config.json")
    if not config_path.exists():
        raise FileNotFoundError(
            "Fichier config.json introuvable. "
            "Veuillez le créer avec la configuration du modèle."
        )
    try:
        with open(config_path, "r", encoding="utf-8") as f:
            config_dict = json.load(f)
        return Config(**config_dict)
    except json.JSONDecodeError as e:
        raise ValueError(f"Erreur de parsing JSON dans config.json : {e}")
    except Exception as e:
        raise ValueError(f"Erreur de validation de la configuration : {e}")

CONFIG = load_config()

# Configuration legacy pour compatibilité
CONFIG_dict = CONFIG.model_dump()
CONFIG_dict["llama_model"] = f"openai/{CONFIG.model.name}"
CONFIG_dict["llama_api_base"] = CONFIG.model.api_base
CONFIG_dict["llama_num_ctx"] = CONFIG.model.num_ctx

# ─────────────────────────────────────────────
# Gestion automatique de llama-server
# ─────────────────────────────────────────────

_llama_server_process = None

def build_llama_server_command(config: Config) -> list:
    """Construit la commande llama-server de manière cohérente.

    Args:
        config: Configuration validée

    Returns:
        Liste des arguments pour subprocess.Popen
    """
    server_config = config.llama_server
    model_path = config.model.path

    # Ajouter l'extension .exe sur Windows si nécessaire
    executable = server_config.executable
    if sys.platform == "win32" and not executable.endswith(".exe"):
        executable += ".exe"

    return [
        executable,
        "-m", model_path,
        "--host", server_config.host,
        "--port", str(server_config.port),
        "-ngl", str(server_config.n_gpu_layers),
        "-t", str(server_config.n_threads),
        "-c", str(server_config.ctx_size),
        "-b", str(server_config.batch_size),
        "-ub", str(server_config.ubatch_size),
        "-ctk", server_config.cache_type_k,
        "-ctv", server_config.cache_type_v,
    ]

def check_llama_server_running() -> bool:
    """Vérifie si llama-server est déjà en cours d'exécution."""
    try:
        response = requests.get(
            f"{CONFIG.model.api_base}/health",
            timeout=2
        )
        return response.status_code == 200
    except Exception:
        return False

def start_llama_server():
    """Démarre automatiquement llama-server avec la configuration de config.json."""
    global _llama_server_process

    # Vérifier si déjà démarré
    if check_llama_server_running():
        log.info("✅ llama-server est déjà en cours d'exécution")
        return True

    # Construire la commande de manière cohérente
    cmd = build_llama_server_command(CONFIG)

    log.info("🚀 Démarrage automatique de llama-server...")
    log.info(f"   Modèle: {CONFIG.model.path}")
    log.info(f"   Port: {CONFIG.llama_server.port}")
    log.info(f"   GPU Layers: {CONFIG.llama_server.n_gpu_layers}")
    log.info(f"   Commande: {' '.join(cmd)}")

    try:
        # Démarrer le processus en arrière-plan
        _llama_server_process = subprocess.Popen(
            cmd,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            creationflags=subprocess.CREATE_NO_WINDOW if sys.platform == "win32" else 0
        )

        # Attendre que le serveur soit prêt
        max_wait = 60  # 60 secondes max
        for i in range(max_wait):
            # Vérifier si le processus s'est terminé avec une erreur
            return_code = _llama_server_process.poll()
            if return_code is not None:
                # Le processus s'est terminé, lire stderr pour l'erreur
                stderr_output = _llama_server_process.stderr.read().decode('utf-8', errors='replace')
                log.error(f"❌ llama-server s'est terminé avec le code {return_code}")
                log.error(f"❌ Erreur stderr: {stderr_output}")
                return False

            if check_llama_server_running():
                log.info(f"✅ llama-server démarré avec succès (PID: {_llama_server_process.pid})")

                # Enregistrer le nettoyage à la sortie
                atexit.register(stop_llama_server)
                return True

            if i % 5 == 0:  # Log toutes les 5 secondes
                log.info(f"⏳ Attente du serveur... ({i}s)")

            time.sleep(1)

        # Timeout : lire stderr pour diagnostiquer
        stderr_output = _llama_server_process.stderr.read().decode('utf-8', errors='replace')
        log.error("❌ Timeout : llama-server n'a pas démarré dans le temps imparti")
        log.error(f"❌ Dernières erreurs stderr: {stderr_output}")
        return False

    except Exception as e:
        log.error(f"❌ Erreur lors du démarrage de llama-server : {e}")
        import traceback
        log.error(traceback.format_exc())
        return False

def stop_llama_server():
    """Arrête proprement llama-server s'il a été démarré automatiquement.

    IMPORTANT : Cette fonction est enregistrée avec atexit.register(),
    donc elle est AUTOMATIQUEMENT appelée quand le script Python se termine.
    C'est le comportement souhaité : llama-server démarre avec l'agent
    et s'arrête automatiquement quand l'agent a fini son travail.
    """
    global _llama_server_process

    if _llama_server_process is None:
        return

    try:
        log.info(f"🛑 Arrêt automatique de llama-server (PID: {_llama_server_process.pid})...")
        _llama_server_process.terminate()

        # Attendre que le processus se termine
        try:
            _llama_server_process.wait(timeout=10)
        except subprocess.TimeoutExpired:
            log.warning("⚠️ Timeout, envoi de SIGKILL...")
            _llama_server_process.kill()

        log.info("✅ llama-server arrêté proprement")
    except Exception as e:
        log.error(f"❌ Erreur lors de l'arrêt de llama-server : {e}")
    finally:
        _llama_server_process = None

# ─────────────────────────────────────────────
# Token Estimation

def estimate_tokens(text: str) -> int:
    """
    Estime le nombre de tokens dans un texte.
    Approximation: 4 caracteres ≈ 1 token pour l'anglais.
    """
    return len(text) // 4

# ─────────────────────────────────────────────
# Logging
# ─────────────────────────────────────────────

# Configurer stdout pour utiliser UTF-8 sur Windows
if sys.platform == "win32":
    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[
        logging.FileHandler("logs/oil_monitor.log", encoding="utf-8"),
        logging.StreamHandler(sys.stdout),
    ],
)
log = logging.getLogger(__name__)

Path("logs").mkdir(exist_ok=True)


# ─────────────────────────────────────────────
# Persistance : historique des événements vus
# ─────────────────────────────────────────────
def load_seen_events() -> set:
    p = Path(CONFIG.persistence.events_db)
    if p.exists():
        with open(p, encoding="utf-8", errors="replace") as f:
            return set(json.load(f))
    return set()


def save_seen_events(seen: set):
    with open(CONFIG.persistence.events_db, "w", encoding="utf-8") as f:
        json.dump(list(seen), f, indent=2)


def event_fingerprint(title: str, source: str) -> str:
    """Hash stable pour identifier un événement déjà traité."""
    raw = f"{title.lower().strip()}|{source.lower().strip()}"
    return hashlib.md5(raw.encode()).hexdigest()


# ─────────────────────────────────────────────
# Persistance : historique des emails envoyés
# ─────────────────────────────────────────────
def load_email_history() -> list:
    p = Path(CONFIG.persistence.history_file)
    if p.exists():
        try:
            with open(p, encoding="utf-8", errors="replace") as f:
                return json.load(f)
        except (json.JSONDecodeError, UnicodeDecodeError) as e:
            log.error(f"⚠️ Fichier historique corrompu ({p}) : {e}. Création d'un nouveau fichier.")
            if p.stat().st_size > 0:
                p.replace(p.with_suffix(".json.corrupt"))
    return []


def save_email_history(history: list):
    p = Path(CONFIG.persistence.history_file)
    # Backup avant écriture
    if p.exists():
        try:
            p.with_suffix(".json.bak").write_text(p.read_text(encoding="utf-8", errors="replace"), encoding="utf-8")
        except Exception:
            pass

    with open(p, "w", encoding="utf-8") as f:
        json.dump(history, f, indent=2, ensure_ascii=False)


def append_email_log(subject: str, body_preview: str, event_id: str):
    history = load_email_history()
    history.append({
        "timestamp": datetime.now().isoformat(),
        "event_id": event_id,
        "subject": subject,
        "preview": body_preview[:300],
    })
    save_email_history(history)
    log.info(f"📧 Historique email mis à jour ({len(history)} entrées)")


# ─────────────────────────────────────────────
# Envoi email via Postfix local
# ─────────────────────────────────────────────
def send_alert_email(subject: str, body: str, event_id: str) -> bool:
    """Envoie une alerte email via relais SMTP Postfix."""
    full_subject = f"{CONFIG.email.email_subject_prefix} {subject}"
    msg = MIMEMultipart("alternative")
    msg["Subject"] = full_subject
    msg["From"] = CONFIG.email.email_from
    msg["To"] = CONFIG.email.email_to
    msg["X-OilMonitor-EventID"] = event_id

    # Corps texte brut
    msg.attach(MIMEText(body, "plain", "utf-8"))

    # Corps HTML enrichi
    html_body = f"""
    <html><body style="font-family: Arial, sans-serif; max-width: 700px; margin: auto;">
      <div style="background:#c0392b; color:white; padding:12px 20px; border-radius:6px 6px 0 0;">
        <h2 style="margin:0;">⚠️ {full_subject}</h2>
      </div>
      <div style="border:1px solid #ddd; padding:20px; border-radius:0 0 6px 6px;">
        <pre style="white-space:pre-wrap; font-family:Arial; font-size:14px;">{body}</pre>
        <hr/>
        <small style="color:#888;">
          Oil Monitor Agent • {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} •
          Event ID: {event_id}
        </small>
      </div>
    </body></html>
    """
    msg.attach(MIMEText(html_body, "html", "utf-8"))

    # Vérifier si l'envoi d'emails est désactivé
    if not CONFIG.email.send_emails:
        log.info(f"📧 Email désactivé (simulation) : {full_subject}")
        append_email_log(full_subject, body, event_id)
        return True

    try:
        with smtplib.SMTP(CONFIG.email.smtp_host, CONFIG.email.smtp_port, timeout=10) as smtp:
            smtp.sendmail(CONFIG.email.email_from, [CONFIG.email.email_to], msg.as_string())
        log.info(f"✅ Email envoyé : {full_subject}")
        append_email_log(full_subject, body, event_id)
        return True
    except Exception as e:
        log.error(f"❌ Échec envoi email : {e}")
        return False


# ─────────────────────────────────────────────
# TOOLS personnalisés
# ─────────────────────────────────────────────

class IranConflictTool(Tool):
    """Tool : conflits Iran / Détroit d'Ormuz / IRGC"""
    name = "search_iran_conflict"
    description = (
        "Search Iran/Hormuz conflicts, IRGC actions, Israel escalation, "
        "sanctions disrupting oil supply. Returns structured summary."
    )
    inputs = {
        "days_back": {
            "type": "integer",
            "description": "How many days back to search (default: 1)",
            "default": 1,
            "nullable": True,
        }
    }
    output_type = "string"

    def __init__(self, search_tool: DuckDuckGoSearchTool):
        super().__init__()
        self._search = search_tool

    def forward(self, days_back: int = 1) -> str:
        from datetime import datetime, timedelta

        current_date = datetime.now().strftime("%Y-%m-%d")
        date_start = datetime.now() - timedelta(days=days_back)
        date_str = date_start.strftime("%Y-%m-%d")

        queries = [
            f"Iran military attack oil infrastructure {current_date} today breaking",
            f"Strait of Hormuz blockade tanker {current_date} just in",
            f"Iran IRGC oil tanker seized {current_date} recent",
            f"Iran Israel strike retaliation oil {current_date} breaking news",
            f"Iran US sanctions oil export disruption {current_date}",
        ]
        results = []
        for q in queries:
            try:
                r = self._search(q)
                if r and len(r) > 50:
                    results.append(f"[Query: {q}]\n{r[:600]}")
            except Exception as e:
                results.append(f"[Query: {q}] Error: {e}")

        header = "=== IRAN CONFLICT SEARCH ===\n"
        header += f"Current Date: {current_date} | Searching since: {date_str}\n\n"

        return header + "\n\n---\n\n".join(results) if results else "No relevant results found."


class RefineryDamageTool(Tool):
    """Tool : dommages raffineries (attaques, accidents, incendies)"""
    name = "search_refinery_damage"
    description = (
        "Search refinery damage, fires, explosions, drone attacks worldwide. "
        "Returns structured results affecting global oil supply."
    )
    inputs = {
        "region": {
            "type": "string",
            "description": "Region to focus on: 'global', 'middle_east', 'russia', 'iraq'. Default: 'global'",
            "default": "global",
            "nullable": True,
        }
    }
    output_type = "string"

    def __init__(self, search_tool: DuckDuckGoSearchTool, visit_tool: VisitWebpageTool):
        super().__init__()
        self._search = search_tool
        self._visit = visit_tool

    def forward(self, region: str = "global") -> str:
        from datetime import datetime

        current_date = datetime.now().strftime("%Y-%m-%d")

        region_map = {
            "middle_east": [
                f"Saudi Aramco refinery attack {current_date} today breaking",
                f"Iraq oil refinery fire drone {current_date} just in",
            ],
            "russia": [
                f"Russia oil refinery drone attack Saratov {current_date} recent",
                f"Russian refinery fire Ukraine drone {current_date} breaking",
            ],
            "iraq": [
                f"Iraq Basra oil field attack {current_date} today",
                f"Iraq Kurdistan pipeline disruption {current_date} recent",
            ],
            "global": [
                f"oil refinery explosion fire {current_date} today breaking",
                f"refinery drone attack oil production disruption {current_date} just in",
                f"Saudi Arabia Aramco infrastructure attack {current_date} recent",
            ],
        }
        queries = region_map.get(region, region_map["global"])
        results = []
        for q in queries:
            try:
                r = self._search(q)
                if r and len(r) > 50:
                    results.append(f"[{q}]\n{r[:600]}")
            except Exception as e:
                results.append(f"[{q}] Error: {e}")

        header = "=== REFINERY DAMAGE SEARCH ===\n"
        header += f"Region: {region} | Current Date: {current_date}\n\n"

        return header + "\n\n---\n\n".join(results) if results else "No refinery damage news found."


class OPECSupplyTool(Tool):
    """Tool : décisions OPEC+, coupes de production, quotas"""
    name = "search_opec_supply"
    description = (
        "Search OPEC+ production cuts, quota decisions, emergency meetings, "
        "supply policy changes. Covers Saudi, Russia, UAE unilateral cuts."
    )
    inputs = {
        "focus": {
            "type": "string",
            "description": "Focus area: 'opec_meeting', 'production_cut', 'all'. Default: 'all'",
            "default": "all",
            "nullable": True,
        }
    }
    output_type = "string"

    def __init__(self, search_tool: DuckDuckGoSearchTool):
        super().__init__()
        self._search = search_tool

    def forward(self, focus: str = "all") -> str:
        from datetime import datetime

        current_date = datetime.now().strftime("%Y-%m-%d")

        queries = [
            f"OPEC+ production cut decision {current_date} today breaking",
            f"Saudi Arabia voluntary oil cut barrel {current_date} just in",
            f"Russia oil export reduction barrel {current_date} recent",
            f"OPEC emergency meeting oil price {current_date} breaking news",
            f"UAE Kuwait oil output quota {current_date} today",
        ]
        results = []
        for q in queries:
            try:
                r = self._search(q)
                if r and len(r) > 50:
                    results.append(f"[{q}]\n{r[:600]}")
            except Exception as e:
                results.append(f"[{q}] Error: {e}")

        header = "=== OPEC+ SUPPLY SEARCH ===\n"
        header += f"Focus: {focus} | Current Date: {current_date}\n\n"

        return header + "\n\n---\n\n".join(results) if results else "No OPEC news found."


class NaturalGasDisruptionTool(Tool):
    """Tool : perturbations gaz naturel (pipelines, LNG, Russie)"""
    name = "search_gas_disruption"
    description = (
        "Search gas pipeline sabotage, LNG terminal damage, Russia cuts to Europe, "
        "Middle East gas field attacks. Gas spikes correlate with oil rallies."
    )
    inputs = {
        "topic": {
            "type": "string",
            "description": "Topic: 'pipeline', 'lng', 'russia_gas', 'all'. Default: 'all'",
            "default": "all",
            "nullable": True,
        }
    }
    output_type = "string"

    def __init__(self, search_tool: DuckDuckGoSearchTool):
        super().__init__()
        self._search = search_tool

    def forward(self, topic: str = "all") -> str:
        from datetime import datetime

        current_date = datetime.now().strftime("%Y-%m-%d")

        queries = [
            f"natural gas pipeline explosion sabotage {current_date} today breaking",
            f"LNG terminal attack fire disruption {current_date} just in",
            f"Russia gas supply Europe cut {current_date} recent",
            f"Qatar North Field gas disruption {current_date} today",
            f"Azerbaijan Georgia gas pipeline attack {current_date} breaking news",
        ]
        results = []
        for q in queries:
            try:
                r = self._search(q)
                if r and len(r) > 50:
                    results.append(f"[{q}]\n{r[:600]}")
            except Exception as e:
                results.append(f"[{q}] Error: {e}")

        header = "=== NATURAL GAS DISRUPTION SEARCH ===\n"
        header += f"Topic: {topic} | Current Date: {current_date}\n\n"

        return header + "\n\n---\n\n".join(results) if results else "No gas disruption news found."


class ShippingDisruptionTool(Tool):
    """Tool : perturbations maritimes (Houthis, pirates, blocage)"""
    name = "search_shipping_disruption"
    description = (
        "Search maritime disruptions: Houthi Red Sea attacks, Bab-el-Mandeb tensions, "
        "Suez blockage, Iran tanker seizures. Directly impacts Brent prices."
    )
    inputs = {}
    output_type = "string"

    def __init__(self, search_tool: DuckDuckGoSearchTool):
        super().__init__()
        self._search = search_tool

    def forward(self) -> str:
        from datetime import datetime

        current_date = datetime.now().strftime("%Y-%m-%d")

        queries = [
            f"Houthi attack oil tanker Red Sea {current_date} today breaking",
            f"Bab-el-Mandeb shipping disruption oil {current_date} just in",
            f"Suez Canal closure tanker blockage {current_date} recent",
            f"Iran seize oil tanker Strait Hormuz {current_date} breaking news",
            f"oil tanker piracy attack {current_date} today",
        ]
        results = []
        for q in queries:
            try:
                r = self._search(q)
                if r and len(r) > 50:
                    results.append(f"[{q}]\n{r[:600]}")
            except Exception as e:
                results.append(f"[{q}] Error: {e}")

        header = "=== SHIPPING DISRUPTION SEARCH ===\n"
        header += f"Current Date: {current_date}\n\n"

        return header + "\n\n---\n\n".join(results) if results else "No shipping disruption news found."


class GeopoliticalEscalationTool(Tool):
    """Tool : escalades géopolitiques générales impactant le pétrole"""
    name = "search_geopolitical_escalation"
    description = (
        "Search broad escalations: Russia-Ukraine energy attacks, US-China tensions, "
        "Libya civil war, Venezuela sanctions, Nigeria pipeline attacks."
    )
    inputs = {}
    output_type = "string"

    def __init__(self, search_tool: DuckDuckGoSearchTool):
        super().__init__()
        self._search = search_tool

    def forward(self) -> str:
        from datetime import datetime

        current_date = datetime.now().strftime("%Y-%m-%d")

        queries = [
            f"Russia Ukraine oil energy infrastructure attack {current_date} today breaking",
            f"Libya oil field shutdown civil war {current_date} just in",
            f"Venezuela oil sanction export {current_date} recent",
            f"Nigeria pipeline attack oil production {current_date} breaking news",
            f"US China South China Sea oil tension {current_date} today",
        ]
        results = []
        for q in queries:
            try:
                r = self._search(q)
                if r and len(r) > 50:
                    results.append(f"[{q}]\n{r[:600]}")
            except Exception as e:
                results.append(f"[{q}] Error: {e}")

        header = "=== GEOPOLITICAL ESCALATION SEARCH ===\n"
        header += f"Current Date: {current_date}\n\n"

        return header + "\n\n---\n\n".join(results) if results else "No geopolitical escalation news found."


class OilPriceTool(Tool):
    """Tool : prix actuels Brent / WTI"""
    name = "get_oil_price"
    description = (
        "Fetches the current Brent crude and WTI crude oil prices, "
        "recent price movements, and analyst forecasts."
    )
    inputs = {}
    output_type = "string"

    def __init__(self, search_tool: DuckDuckGoSearchTool):
        super().__init__()
        self._search = search_tool

    def forward(self) -> str:
        from datetime import datetime

        current_date = datetime.now().strftime("%Y-%m-%d")

        queries = [
            f"Brent crude oil price today {current_date}",
            f"WTI crude price today barrel {current_date}",
            f"oil price forecast analyst {current_date}",
        ]
        results = []
        for q in queries:
            try:
                r = self._search(q)
                if r and len(r) > 50:
                    results.append(f"[{q}]\n{r[:400]}")
            except Exception as e:
                results.append(f"[{q}] Error: {e}")

        header = "=== OIL PRICE DATA ===\n"
        header += f"Current Date: {current_date}\n\n"

        return header + "\n\n".join(results) if results else "Oil price data unavailable."


class RecentNewsTool(Tool):
    """Tool : actualités très récentes sur le pétrole"""
    name = "search_recent_news"
    description = (
        "Search recent oil news from major sources. Filter by date (24h, 48h, 7d). "
        "Prioritize breaking news from Reuters, Bloomberg, AP, BBC, FT, WSJ."
    )
    inputs = {
        "topic": {
            "type": "string",
            "description": "Topic to search: 'iran', 'refinery', 'opec', 'gas', 'shipping', 'geopolitical', 'all'. Default: 'all'",
            "default": "all",
            "nullable": True,
        },
        "timeframe": {
            "type": "string",
            "description": "Time period: '24h', '48h', '7d'. Default: '24h'",
            "default": "24h",
            "nullable": True,
        }
    }
    output_type = "string"

    def __init__(self, search_tool: DuckDuckGoSearchTool):
        super().__init__()
        self._search = search_tool

    def forward(self, topic: str = "all", timeframe: str = "24h") -> str:
        # On utilise des termes relatifs au lieu de la date fixe qui bloque souvent les résultats
        if timeframe == "24h":
            time_query = "today"
            time_label = "Last 24 hours"
        elif timeframe == "48h":
            time_query = "yesterday OR today"
            time_label = "Last 48 hours"
        else:
            time_query = "this week"
            time_label = "Last 7 days"

        # Simplification des sources : on ne met que les 2 plus importantes pour ne pas surcharger la requête
        # L'agent trouvera les autres via une recherche plus large
        priority_sites = "site:reuters.com OR site:bloomberg.com"

        topic_queries = {
            "iran": [
                f"Iran oil attack {time_query} {priority_sites}",
                f"Strait of Hormuz tension {time_query}",
            ],
            "refinery": [
                f"oil refinery explosion fire {time_query}",
                f"refinery drone attack {time_query}",
            ],
            "opec": [