This Python script is an automation program designed to download...

August 28, 2025 at 06:25 PM

import os
import time
import shutil
import logging
import configparser
import keyboard
from datetime import datetime
from selenium import webdriver
from selenium.common.exceptions import WebDriverException

# Configuração de logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('robo_extracao.log', encoding='utf-8'),
        logging.StreamHandler()
    ]
)

# Carregar configurações
config = configparser.ConfigParser()
config.read('config.ini', encoding='utf-8')

# Configurações do sistema
DOWNLOAD_FOLDER = config.get('PATHS', 'DOWNLOAD_FOLDER', fallback=r"C:\Users\bso4\Downloads")
DESTINO = config.get('PATHS', 'DESTINO', fallback=r"C:\Users\bso4\Downloads\robo_extracao\extracao")
DOWNLOAD_URL = config.get('URLS', 'DOWNLOAD_URL', fallback="https://msplanoaplp1.scl.corp:8000/energisa/g2e/abalroados-excel.php")
LOGIN_URL = config.get('URLS', 'LOGIN_URL', fallback="https://msplanoaplp1.scl.corp:8000/energisa/g2e/")

# Configurações de tempo
TIMEOUT_DOWNLOAD = config.getint('SETTINGS', 'TIMEOUT_DOWNLOAD', fallback=60)
MAX_TENTATIVAS = config.getint('SETTINGS', 'MAX_TENTATIVAS', fallback=3)
ESPERA_ENTRE_TENTATIVAS = config.getint('SETTINGS', 'ESPERA_ENTRE_TENTATIVAS', fallback=10)

def criar_pasta_se_nao_existir(caminho):
    """Cria uma pasta se ela não existir"""
    try:
        os.makedirs(caminho, exist_ok=True)
        logging.info(f"Pasta verificada/criada: {caminho}")
        return True
    except Exception as e:
        logging.error(f"Erro ao criar pasta {caminho}: {e}")
        return False

def aguardar_download_completo(pasta, timeout=TIMEOUT_DOWNLOAD):
    """
    Aguarda até que não haja mais arquivos .crdownload na pasta
    Retorna True se o download foi completado, False se timeout
    """
    logging.info("Aguardando conclusão do download...")
    tempo_inicio = time.time()
    
    while time.time() - tempo_inicio < timeout:
        # Verifica se há arquivos com extensão .crdownload (download em andamento)
        arquivos_baixando = [f for f in os.listdir(pasta) if f.endswith('.crdownload') or f.endswith('.tmp')]
        
        if not arquivos_baixando:
            # Pequena pausa adicional para garantir que o download realmente terminou
            time.sleep(2)
            # Verifica novamente
            if not any(f.endswith('.crdownload') or f.endswith('.tmp') for f in os.listdir(pasta)):
                logging.info("Download concluído com sucesso.")
                return True
        
        time.sleep(2)
    
    logging.error(f"Timeout após {timeout} segundos aguardando download.")
    return False

def encontrar_arquivo_mais_recente(pasta, prefixo, extensao):
    """
    Encontra o arquivo mais recente na pasta com o prefixo e extensão especificados
    """
    try:
        arquivos = [
            os.path.join(pasta, f) for f in os.listdir(pasta)
            if f.startswith(prefixo) and f.endswith(extensao)
        ]
        
        if not arquivos:
            return None
            
        # Retorna o arquivo mais recente (com base na data de modificação)
        arquivo_mais_novo = max(arquivos, key=os.path.getmtime)
        return arquivo_mais_novo
        
    except Exception as e:
        logging.error(f"Erro ao buscar arquivos: {e}")
        return None

def mover_arquivo(origem, destino_dir):
    """
    Move um arquivo para o diretório de destino, preservando o nome do arquivo
    """
    try:
        if not os.path.exists(origem):
            logging.error(f"Arquivo de origem não existe: {origem}")
            return False
            
        # Garante que o diretório de destino existe
        os.makedirs(destino_dir, exist_ok=True)
        
        # Define o caminho completo de destino
        nome_arquivo = os.path.basename(origem)
        destino_completo = os.path.join(destino_dir, nome_arquivo)
        
        # Move o arquivo
        shutil.move(origem, destino_completo)
        
        # Verifica se o arquivo foi movido com sucesso
        if os.path.exists(destino_completo):
            tamanho = os.path.getsize(destino_completo)
            logging.info(f"Arquivo movido com sucesso: {destino_completo} ({tamanho} bytes)")
            return True
        else:
            logging.error("Falha ao mover o arquivo.")
            return False
            
    except Exception as e:
        logging.error(f"Erro ao mover arquivo: {e}")
        return False

def executar_extracao(driver):
    """
    Executa o processo de extração: download e movimentação do arquivo
    """
    try:
        logging.info("Iniciando processo de extração...")
        
        # Abre uma nova aba com o link de download
        driver.execute_script(f"window.open('{DOWNLOAD_URL}', '_blank');")
        time.sleep(3)  # Aguarda a aba abrir
        
        # Aguarda o download ser concluído
        if not aguardar_download_completo(DOWNLOAD_FOLDER):
            logging.error("Falha no download do arquivo.")
            return False
        
        # Localiza o arquivo mais recente
        arquivo_baixado = encontrar_arquivo_mais_recente(
            DOWNLOAD_FOLDER, 
            "Rel-PostesAbalroados-G2E", 
            ".xls"
        )
        
        if not arquivo_baixado:
            logging.error("Nenhum arquivo correspondente encontrado na pasta de downloads.")
            return False
        
        logging.info(f"Arquivo baixado encontrado: {os.path.basename(arquivo_baixado)}")
        
        # Move o arquivo para o destino final
        if mover_arquivo(arquivo_baixado, DESTINO):
            logging.info("Processo de extração concluído com sucesso!")
            return True
        else:
            logging.error("Falha ao mover o arquivo para o destino.")
            return False
            
    except Exception as e:
        logging.error(f"Erro durante a extração: {e}")
        return False

def main():
    """
    Função principal do robô de automação
    """
    logging.info("=" * 50)
    logging.info("INICIANDO ROBÔ DE EXTRAÇÃO DE DADOS")
    logging.info("=" * 50)
    
    # Verifica e cria as pastas necessárias
    if not criar_pasta_se_nao_existir(DESTINO):
        logging.error("Não foi possível criar a pasta de destino. Abortando.")
        return
    
    driver = None
    tentativa = 1
    
    while tentativa <= MAX_TENTATIVAS:
        try:
            logging.info(f"Tentativa {tentativa} de {MAX_TENTATIVAS}")
            
            # Configurar Chrome para baixar automaticamente
            options = webdriver.ChromeOptions()
            prefs = {
                "download.default_directory": DOWNLOAD_FOLDER,
                "download.prompt_for_download": False,
                "download.directory_upgrade": True,
                "safebrowsing.enabled": True
            }
            options.add_experimental_option("prefs", prefs)
            options.add_argument("--start-maximized")
            
            # Inicializar o driver do Chrome
            driver = webdriver.Chrome(options=options)
            driver.get(LOGIN_URL)
            
            logging.info("Navegador iniciado com sucesso.")
            logging.info("Por favor, faça login manualmente no sistema.")
            logging.info("Navegue até a página de download desejada.")
            logging.info("Quando estiver pronto, pressione F5 para iniciar a extração.")
            logging.info("Pressione ESC a qualquer momento para cancelar.")
            
            # Aguarda o usuário pressionar F5 ou ESC
            while True:
                if keyboard.is_pressed("F5"):
                    logging.info("Tecla F5 detectada! Iniciando extração...")
                    if executar_extracao(driver):
                        # Sucesso - encerra o robô
                        driver.quit()
                        logging.info("Robô finalizado com sucesso.")
                        return
                    else:
                        # Falha - tenta novamente
                        break
                        
                elif keyboard.is_pressed("esc"):
                    logging.info("Operação cancelada pelo usuário.")
                    if driver:
                        driver.quit()
                    return
                    
                time.sleep(0.1)
            
            # Se chegou aqui, houve falha na extração
            if driver:
                driver.quit()
                driver = None
                
            # Aguarda antes da próxima tentativa
            if tentativa < MAX_TENTATIVAS:
                logging.info(f"Aguardando {ESPERA_ENTRE_TENTATIVAS} segundos antes da próxima tentativa...")
                time.sleep(ESPERA_ENTRE_TENTATIVAS)
                
            tentativa += 1
            
        except WebDriverException as e:
            logging.error(f"Erro no WebDriver: {e}")
            if driver:
                driver.quit()
            if tentativa >= MAX_TENTATIVAS:
                break
            logging.info(f"Aguardando {ESPERA_ENTRE_TENTATIVAS} segundos antes da próxima tentativa...")
            time.sleep(ESPERA_ENTRE_TENTATIVAS)
            tentativa += 1
            
        except Exception as e:
            logging.error(f"Erro inesperado: {e}")
            if driver:
                driver.quit()
            break
    
    logging.error("Robô finalizado com falha após todas as tentativas.")

if __name__ == "__main__":
    main()

This Python script is an automation program designed to download a file from a specified website, manage the downloaded file, and save it to a designated directory. Let’s break down its core functionality and purpose:

Overview of What the Code Does:

Configuration Loading:
- Reads settings (e.g., file paths, URLs, timeouts, etc.) from a config.ini file.
- Sets up directories for downloads (DOWNLOAD_FOLDER) and the destination folder (DESTINO).
- Handles timeout settings and retry attempts for the automation.
Logging Setup:
- Logs all actions and errors to a log file (e.g., robo_extracao.log) and also prints them to the console.
Folder Management:
- Checks whether the required folders exist and creates them if necessary.
Web Browser Automation:
- Uses Selenium WebDriver to automate browser actions in a Google Chrome browser.
- Allows the user to manually log in to a specified website (LOGIN_URL).
- Automates the process of downloading a data file (DOWNLOAD_URL).
File Management after Download:
- Waits for the download to complete by monitoring the download folder for partial download files (such as .crdownload or .tmp).
- Locates the most recently downloaded file matching a specific filename prefix and file extension.
- Moves the downloaded file to a designated destination folder and confirms its transfer.
Error Handling and Retry:
- Attempts the file download process up to a maximum number of retries (MAX_TENTATIVAS) in case of errors or failures.
- Implements delays between retry attempts to allow the user or system to recover.
User Interaction:
- Waits for the user to manually log in to the system using the browser.
- Reacts when the user presses the F5 key by starting the download and extraction process.
- Allows the user to cancel the script by pressing the ESC key.
Timeout Handling:
- Terminates the download and extraction process if the maximum timeout (TIMEOUT_DOWNLOAD) is reached or if all retry attempts have been exhausted.

Step-by-Step Details:

Initial Setup:

Imports required libraries (os, time, shutil, logging, Selenium, etc.).
Reads settings from the config.ini file into variables like DOWNLOAD_FOLDER, DESTINO, and DOWNLOAD_URL.

Main Flow in `main()` Function:

Folder Verification: Ensures the destination folder exists; if it cannot be created, the script exits early.
Web Browser Initialization: Launches a Chrome browser with specific download preferences to automate downloading files.
Prompts the user to log in manually and navigate to the relevant page.
User-driven Actions: Listens for user input (F5 to proceed, ESC to cancel) to control the process.

File Download and Extraction (`executar_extracao`):

Automates downloading a file from the specified DOWNLOAD_URL in a new browser tab.
Waits for the download to complete by checking for the absence of .crdownload files in the folder.
Searches for the most recently downloaded file matching specific criteria (e.g., filename prefix "Rel-PostesAbalroados-G2E" and extension .xls).
Moves the downloaded file to the destination directory, preserving the filename.

Retry Logic:

Repeats the entire process (up to MAX_TENTATIVAS times) if downloading or extraction fails.
Introduces delay (ESPERA_ENTRE_TENTATIVAS) between retries to allow recovery.

Error Handling:

Logs all errors to both the console and robo_extracao.log.
Safely shuts down the browser in case of exceptions.
Terminates gracefully if all retries fail.

Purpose of the Code:

This script is likely a data scraper or file management tool designed to:

Automate downloading of specific files (e.g., reports) from a web-based system.
Handle manual intervention (user logins) within an automated process.
Move extracted data files to appropriate directories for further processing or archival.
Provide robust logging for debugging and auditing.

Real-World Use Case:

The script appears to be tailored for corporate or team environments where reports must be routinely downloaded and processed from a company intranet or external website. It streamlines what would otherwise be a manual and repetitive task.

Generate your own explanations

Download our vscode extension

Read other generated explanations

Built by @thebuilderjr