from __future__ import annotations import re from datetime import datetime from typing import Any import requests from bs4 import BeautifulSoup from . import db from .models import ObraSocialCatalog, ObraSocialPageSnapshot from .utils import hash_payload, safe_json_dumps GEOREF_BASE = 'https://apis.datos.gob.ar/georef/api' OBRAS_SOCIALES_URL = 'https://www.sssalud.gob.ar/?page=listRnosc&tipo={tipo}' class IntegrationError(Exception): pass def normalize_space(text: str) -> str: return re.sub(r'\s+', ' ', (text or '')).strip() def get_requests_session(): session = requests.Session() session.headers.update({ 'User-Agent': 'BookAppointmentsPro/3.0 (+Flask admin integration)' }) return session def parse_categoria_y_origen(html: str): soup = BeautifulSoup(html, 'html.parser') categoria = '' origen = '' for div in soup.select('div.col-md-8.col-md-offset-2.text-center, div.text-center'): text = normalize_space(div.get_text(' ', strip=True)) if 'Orígen de datos:' in text or 'Origen de datos:' in text: b = div.find('b') if b: categoria = normalize_space(b.get_text(' ', strip=True)) m = re.search(r'Or[ií]gen de datos:\s*(.+)', text, re.I) if m: origen = normalize_space(m.group(1)) break if not categoria: header_candidates = soup.find_all(['h1', 'h2', 'h3', 'b']) for node in header_candidates: txt = normalize_space(node.get_text(' ', strip=True)) if txt and 'RNAS' not in txt and len(txt) > 8: categoria = txt break return { 'categoria_oficial': categoria, 'origen_datos': origen, } def parse_obras_sociales_rows(html: str) -> dict[str, Any]: soup = BeautifulSoup(html, 'html.parser') meta = parse_categoria_y_origen(html) body_text = normalize_space(soup.get_text(' ', strip=True)) status = 'ok' if 'No se reportan datos' in body_text: status = 'no_data' if 'error de conexión' in body_text.lower() or 'error de base' in body_text.lower() or 'error de conexion' in body_text.lower(): status = 'warning' rows = [] table = soup.find('table') if table: trs = table.find_all('tr') else: trs = soup.find_all('tr') for tr in trs: cells = [normalize_space(td.get_text(' ', strip=True)) for td in tr.find_all(['td', 'th'])] if len(cells) < 5: continue header_join = ' '.join(cells[:5]).lower() if 'rnas' in header_join and 'denominación' in header_join: continue row = { 'rnas': cells[0], 'denominacion': cells[1], 'domicilio': cells[2], 'localidad': cells[3], 'telefono': cells[4] if len(cells) > 4 else '', 'linea_gratuita': cells[5] if len(cells) > 5 else '', 'habilitada_opciones': cells[6] if len(cells) > 6 else '', } if row['rnas'] and row['denominacion']: rows.append(row) return { 'categoria_oficial': meta['categoria_oficial'], 'origen_datos': meta['origen_datos'], 'status': status, 'rows': rows, } def fetch_obras_sociales_tipo(tipo: int) -> dict[str, Any]: session = get_requests_session() resp = session.get(OBRAS_SOCIALES_URL.format(tipo=tipo), timeout=30) resp.raise_for_status() parsed = parse_obras_sociales_rows(resp.text) parsed['tipo'] = tipo return parsed def sync_obras_sociales(tipo_desde: int = 1, tipo_hasta: int = 20) -> dict[str, Any]: summary = { 'pages_processed': 0, 'pages_changed': 0, 'rows_new': 0, 'rows_updated': 0, 'pages_with_error': 0, 'messages': [], } now = datetime.utcnow() seen_ids = set() for tipo in range(tipo_desde, tipo_hasta + 1): page = ObraSocialPageSnapshot.query.filter_by(tipo=tipo).first() if not page: page = ObraSocialPageSnapshot(tipo=tipo) db.session.add(page) db.session.flush() try: snapshot = fetch_obras_sociales_tipo(tipo) payload_for_hash = { 'tipo': tipo, 'categoria_oficial': snapshot['categoria_oficial'], 'origen_datos': snapshot['origen_datos'], 'status': snapshot['status'], 'rows': snapshot['rows'], } current_hash = hash_payload(payload_for_hash) changed = page.last_hash != current_hash if changed: summary['pages_changed'] += 1 page.categoria_oficial = snapshot['categoria_oficial'] page.origen_datos = snapshot['origen_datos'] page.status = snapshot['status'] page.row_count = len(snapshot['rows']) page.last_hash = current_hash page.last_error = None page.last_synced_at = now for row in snapshot['rows']: row_hash = hash_payload({ 'tipo': tipo, 'categoria_oficial': snapshot['categoria_oficial'], **row, }) item = ObraSocialCatalog.query.filter_by(rnas=row['rnas']).first() if not item: item = ObraSocialCatalog(rnas=row['rnas']) db.session.add(item) summary['rows_new'] += 1 else: if item.row_hash != row_hash: summary['rows_updated'] += 1 item.tipo = tipo item.categoria_oficial = snapshot['categoria_oficial'] item.denominacion = row['denominacion'] item.domicilio = row['domicilio'] item.localidad = row['localidad'] item.telefono = row['telefono'] item.linea_gratuita = row['linea_gratuita'] item.habilitada_opciones = row['habilitada_opciones'] item.vigente = True item.row_hash = row_hash item.last_seen_at = now item.page_snapshot_id = page.id db.session.flush() seen_ids.add(item.id) summary['pages_processed'] += 1 except Exception as exc: page.status = 'error' page.last_error = str(exc) page.last_synced_at = now summary['pages_with_error'] += 1 summary['messages'].append(f'tipo={tipo}: {exc}') # marcar como no vigentes las no vistas en esta corrida if seen_ids: ObraSocialCatalog.query.filter(~ObraSocialCatalog.id.in_(seen_ids)).update( {'vigente': False}, synchronize_session=False ) db.session.commit() return summary def georef_get(path: str, params: dict[str, Any] | None = None): session = get_requests_session() resp = session.get(f'{GEOREF_BASE}/{path}', params=params or {}, timeout=30) resp.raise_for_status() return resp.json() def get_provinces(): data = georef_get('provincias', {'campos': 'id,nombre', 'max': 100}) return data.get('provincias', []) def get_municipios(provincia_id: str): data = georef_get('municipios', {'provincia': provincia_id, 'campos': 'id,nombre', 'max': 500}) return data.get('municipios', []) def get_localidades(provincia_id: str = '', municipio_id: str = ''): params = {'campos': 'id,nombre', 'max': 500} if provincia_id: params['provincia'] = provincia_id if municipio_id: params['municipio'] = municipio_id data = georef_get('localidades', params) return data.get('localidades', []) def normalize_sisa_item(item: Any) -> dict[str, Any]: if isinstance(item, dict): getter = item.get else: getter = lambda k, default=None: getattr(item, k, default) name_parts = [getter('apellido'), getter('nombre')] visible = normalize_space(' '.join([p for p in name_parts if p])) if not visible: visible = getter('nombreCompleto') or getter('displayName') or getter('nombre_apellido') or '' specialty = getter('especialidad') or getter('specialty') or getter('especialidadNombre') or '' jurisdiction = getter('jurisdiccion') or getter('jurisdiccionNombre') or getter('provincia') or '' return { 'documento': getter('dni') or getter('documento') or '', 'display_name': normalize_space(visible), 'matricula': getter('matricula') or getter('matriculaNacional') or getter('numeroMatricula') or '', 'profession_name': getter('profesion') or getter('profesionNombre') or getter('titulo') or '', 'specialty': specialty, 'jurisdiction_name': jurisdiction, 'state_name': getter('estado') or getter('estadoRegistro') or getter('situacion') or '', 'raw': safe_json_dumps(item), } def _zeep_client(wsdl: str): from zeep import Client from zeep.transports import Transport session = get_requests_session() transport = Transport(session=session, timeout=30) return Client(wsdl=wsdl, transport=transport) def sisa_test_connection(wsdl: str, user: str, password: str, operation: str) -> dict[str, Any]: if not wsdl: raise IntegrationError('Falta configurar la URL WSDL.') if not user or not password: raise IntegrationError('Faltan las credenciales SISA.') client = _zeep_client(wsdl) service = client.service if not hasattr(service, operation): ops = ', '.join(sorted(client.wsdl.bindings[next(iter(client.wsdl.bindings))].all())[:10]) if client.wsdl.bindings else '' raise IntegrationError(f'La operación {operation} no existe en el WSDL. Operaciones detectadas: {ops}') return {'ok': True, 'message': f'WSDL cargado correctamente. Operación disponible: {operation}'} def sisa_search_professionals(wsdl: str, user: str, password: str, operation: str, dni: str = '', query: str = '', matricula: str = '') -> list[dict[str, Any]]: if not wsdl: raise IntegrationError('Falta configurar la URL WSDL.') if not user or not password: raise IntegrationError('Faltan las credenciales SISA.') client = _zeep_client(wsdl) service = client.service if not hasattr(service, operation): raise IntegrationError(f'La operación {operation} no existe en el WSDL configurado.') method = getattr(service, operation) candidates = [ {'usuario': user, 'clave': password, 'dni': dni, 'apellidoNombre': query, 'matricula': matricula}, {'usuario': user, 'password': password, 'dni': dni, 'apellidoNombre': query, 'matricula': matricula}, {'user': user, 'password': password, 'dni': dni, 'apellidoNombre': query, 'matricula': matricula}, {'usuario': user, 'clave': password, 'documento': dni, 'nombreApellido': query, 'matricula': matricula}, {'usuario': user, 'clave': password, 'matricula': matricula, 'query': query, 'dni': dni}, {'dni': dni, 'apellidoNombre': query, 'matricula': matricula, 'usuario': user, 'clave': password}, ] last_error = None response = None for kwargs in candidates: clean_kwargs = {k: v for k, v in kwargs.items() if v not in (None, '')} try: response = method(**clean_kwargs) break except Exception as exc: last_error = exc continue if response is None: raise IntegrationError(f'No se pudo ejecutar la operación SISA: {last_error}') if response is None: return [] if isinstance(response, list): items = response elif isinstance(response, dict): for key in ('profesionales', 'items', 'return', 'resultado', 'results'): if key in response and response[key]: items = response[key] break else: items = [response] else: possible = None for key in ('profesionales', 'items', 'return', 'resultado', 'results'): possible = getattr(response, key, None) if possible: break items = possible if possible is not None else [response] normalized = [normalize_sisa_item(item) for item in items if item] normalized = [item for item in normalized if item['display_name'] or item['documento'] or item['matricula']] return normalized