On a single Confluence page containing the html-bobswift macro, I would like to be able to extract the content of this html-bobswift macro and copy it to the same page in text format. I would like to be able to do this for all Confluences Cloud pages using the html-bobswift macro with python
I tried to ask ChatGPT for it and it uses BeautifulSoup to extract the macro content (ac:structured-macro), find its ac:plain-text-body, and insert it below the macro.
The script seems to work but it doesn't apply to all the pages (I have 160 pages using that macro) and I don't know why.
Here is the code:
import requests
from requests.auth import HTTPBasicAuth
from bs4 import BeautifulSoup
import json
# Configuration de Confluence Cloud
CONFLUENCE_BASE_URL = "https://site.atlassian.net/wiki"
API_USERNAME = "email address"
API_TOKEN = ""
HEADERS = {
"Content-Type": "application/json"
}
def get_spaces():
"""
Récupère la liste de tous les espaces.
"""
url = f"{CONFLUENCE_BASE_URL}/rest/api/space?limit=100"
response = requests.get(url, auth=HTTPBasicAuth(API_USERNAME, API_TOKEN))
if response.status_code == 200:
return response.json().get('results', [])
else:
print("Erreur lors de la récupération des espaces:", response.text)
return []
def get_pages(space_key, start=0, limit=10000):
"""
Récupère les pages d'un espace donné.
L'endpoint inclut l'extension de la partie 'body.storage' et la version.
"""
url = (f"{CONFLUENCE_BASE_URL}/rest/api/content"
f"?type=page&spaceKey={space_key}&start={start}&limit={limit}&expand=body.storage,version")
response = requests.get(url, auth=HTTPBasicAuth(API_USERNAME, API_TOKEN))
if response.status_code == 200:
data = response.json()
return data.get('results', []), data.get('size', 0), data.get('totalSize', 0)
else:
print(f"Erreur lors de la récupération des pages de l'espace {space_key}:", response.text)
return [], 0, 0
def update_page(page_id, title, new_body, version_number):
"""
Met à jour le contenu d'une page en utilisant l'API REST.
"""
url = f"{CONFLUENCE_BASE_URL}/rest/api/content/{page_id}"
payload = {
"id": page_id,
"type": "page",
"title": title,
"body": {
"storage": {
"value": new_body,
"representation": "storage"
}
},
"version": {
"number": version_number + 1
}
}
response = requests.put(url, auth=HTTPBasicAuth(API_USERNAME, API_TOKEN), headers=HEADERS, data=json.dumps(payload))
if response.status_code == 200:
print(f"Page '{title}' (ID: {page_id}) mise à jour avec succès.")
else:
print(f"Erreur lors de la mise à jour de la page '{title}' (ID: {page_id}): {response.status_code} {response.text}")
def process_page(page):
"""
Cherche la macro 'html-bobswift' dans le contenu d'une page,
extrait le contenu du 'ac:plain-text-body' et l'insère en dessous.
Renvoie le nouveau contenu HTML si la page a été modifiée, sinon False.
"""
page_id = page['id']
title = page['title']
version_number = page['version']['number']
body_html = page['body']['storage']['value']
soup = BeautifulSoup(body_html, 'html.parser')
macros = soup.find_all('ac:structured-macro', {"ac:name": "html-bobswift"})
if not macros:
return False # Pas de macro trouvée, pas de modification
updated = False
for macro in macros:
plain_text_body = macro.find('ac:plain-text-body')
if plain_text_body:
plain_text = plain_text_body.get_text(strip=True)
if plain_text:
# Crée un nouveau tag <p> contenant le texte brut
new_tag = soup.new_tag("p")
new_tag.string = plain_text
# Insère le nouveau paragraphe juste après la macro
macro.insert_after(new_tag)
updated = True
if updated:
return str(soup)
else:
return False
def process_all_pages():
"""
Parcourt tous les espaces et toutes les pages,
et met à jour celles qui contiennent la macro html-bobswift.
"""
spaces = get_spaces()
for space in spaces:
space_key = space['key']
print(f"Traitement de l'espace: {space_key}")
start = 0
while True:
pages, size, total = get_pages(space_key, start=start)
if not pages:
break
for page in pages:
new_body = process_page(page)
if new_body:
print(f"Mise à jour de la page: {page['title']} (ID: {page['id']})")
update_page(page['id'], page['title'], new_body, page['version']['number'])
start += size
if start >= total:
break
if __name__ == "__main__":
process_all_pages()