Copy the contents of the html-bobswift macro outside the macro below on the same page in plain text

Get involved · March 4, 2025

On a single Confluence page containing the html-bobswift macro, I would like to be able to extract the content of this html-bobswift macro and copy it to the same page in text format. I would like to be able to do this for all Confluences Cloud pages using the html-bobswift macro with python

I tried to ask ChatGPT for it and it uses BeautifulSoup to extract the macro content (ac:structured-macro), find its ac:plain-text-body, and insert it below the macro.

The script seems to work but it doesn't apply to all the pages (I have 160 pages using that macro) and I don't know why.

Here is the code:

import requests
from requests.auth import HTTPBasicAuth
from bs4 import BeautifulSoup
import json
# Configuration de Confluence Cloud
CONFLUENCE_BASE_URL = "https://site.atlassian.net/wiki"
API_USERNAME = "email address"
API_TOKEN = ""
HEADERS = {
   "Content-Type": "application/json"
}
def get_spaces():
   """
   Récupère la liste de tous les espaces.
   """
   url = f"{CONFLUENCE_BASE_URL}/rest/api/space?limit=100"
   response = requests.get(url, auth=HTTPBasicAuth(API_USERNAME, API_TOKEN))
   if response.status_code == 200:
       return response.json().get('results', [])
   else:
       print("Erreur lors de la récupération des espaces:", response.text)
       return []
def get_pages(space_key, start=0, limit=10000):
   """
   Récupère les pages d'un espace donné.
   L'endpoint inclut l'extension de la partie 'body.storage' et la version.
   """
   url = (f"{CONFLUENCE_BASE_URL}/rest/api/content"
          f"?type=page&spaceKey={space_key}&start={start}&limit={limit}&expand=body.storage,version")
   response = requests.get(url, auth=HTTPBasicAuth(API_USERNAME, API_TOKEN))
   if response.status_code == 200:
       data = response.json()
       return data.get('results', []), data.get('size', 0), data.get('totalSize', 0)
   else:
       print(f"Erreur lors de la récupération des pages de l'espace {space_key}:", response.text)
       return [], 0, 0
def update_page(page_id, title, new_body, version_number):
   """
   Met à jour le contenu d'une page en utilisant l'API REST.
   """
   url = f"{CONFLUENCE_BASE_URL}/rest/api/content/{page_id}"
   payload = {
       "id": page_id,
       "type": "page",
       "title": title,
       "body": {
           "storage": {
               "value": new_body,
               "representation": "storage"
           }
       },
       "version": {
           "number": version_number + 1
       }
   }
   response = requests.put(url, auth=HTTPBasicAuth(API_USERNAME, API_TOKEN), headers=HEADERS, data=json.dumps(payload))
   if response.status_code == 200:
       print(f"Page '{title}' (ID: {page_id}) mise à jour avec succès.")
   else:
       print(f"Erreur lors de la mise à jour de la page '{title}' (ID: {page_id}): {response.status_code} {response.text}")
def process_page(page):
   """
   Cherche la macro 'html-bobswift' dans le contenu d'une page,
   extrait le contenu du 'ac:plain-text-body' et l'insère en dessous.
   Renvoie le nouveau contenu HTML si la page a été modifiée, sinon False.
   """
   page_id = page['id']
   title = page['title']
   version_number = page['version']['number']
   body_html = page['body']['storage']['value']
   soup = BeautifulSoup(body_html, 'html.parser')
   macros = soup.find_all('ac:structured-macro', {"ac:name": "html-bobswift"})
   if not macros:
       return False  # Pas de macro trouvée, pas de modification
   updated = False
   for macro in macros:
       plain_text_body = macro.find('ac:plain-text-body')
       if plain_text_body:
           plain_text = plain_text_body.get_text(strip=True)
           if plain_text:
               # Crée un nouveau tag <p> contenant le texte brut
               new_tag = soup.new_tag("p")
               new_tag.string = plain_text
               # Insère le nouveau paragraphe juste après la macro
               macro.insert_after(new_tag)
               updated = True
   if updated:
       return str(soup)
   else:
       return False
def process_all_pages():
   """
   Parcourt tous les espaces et toutes les pages,
   et met à jour celles qui contiennent la macro html-bobswift.
   """
   spaces = get_spaces()
   for space in spaces:
       space_key = space['key']
       print(f"Traitement de l'espace: {space_key}")
       start = 0
       while True:
           pages, size, total = get_pages(space_key, start=start)
           if not pages:
               break
           for page in pages:
               new_body = process_page(page)
               if new_body:
                   print(f"Mise à jour de la page: {page['title']} (ID: {page['id']})")
                   update_page(page['id'], page['title'], new_body, page['version']['number'])
           start += size
           if start >= total:
               break
if __name__ == "__main__":
   process_all_pages()

A pop-up survey could appear while you're here --curious what it's for? Click here to learn more!

Forums

Q&A

Community resources

Support

Top groups

Community resources

Support

Learn

Community resources

Support

Events

Community resources

Support

Copy the contents of the html-bobswift macro outside the macro below on the same page in plain text

1 answer

Suggest an answer

Was this helpful?

Thanks!

DEPLOYMENT TYPE

PRODUCT PLAN

PERMISSIONS LEVEL

TAGS

Atlassian Community Events