How to properly update a page with HTML code in Python: incorrect Confluence API documentation?

Stan Ry
Rising Star
Rising Star
Rising Stars are recognized for providing high-quality answers to other users. Rising Stars receive a certificate of achievement and are on the path to becoming Community Leaders.
November 27, 2019

Hello there,

I have found a wonderful Python script over at here. The script is supposed to post passed HTML string onto a Confluence page.

However, there are following issues:

  1. If you don't escape '<' and '>' to &lt; and &gt;, Confluence returns HTTP error 400 when using the script.
  2. If you convert HTML by using HTML-safe sequences, the script works fine. But it renders the resulting page to not handle the HTML code. Published HTML code does not get interpret and is displayed as-is. You see HTML code for a hyperlink but not the hyperlink itself.
  3. The script uses its own JSON format to pass HTML payload. However, if you use the format suggested by official Atlassian documentation nothing works as if there were some issues with documentation.

The code I am using is as follows.

import requests
import csv
import json
import getpass
import html


requests.packages.urllib3.disable_warnings() #need this to avoid SSL issues

auth = ('StanRy', getpass.getpass())

s = requests.Session()
s.auth = auth
s.verify = False
s.headers = {"Content-Type": "application/json"}

BASE_URL = 'https://example.net/confluence_instance/rest/api/content'
VIEW_URL = 'https://example.net/confluence_instance/pages/viewpage.action?pageId='

def get_page_ancestors(pageid):
# Get basic page information plus the ancestors property

url = '{base}/{pageid}?expand=ancestors'.format(
base = BASE_URL,
pageid = pageid)

r = s.get(url, auth = auth)

r.raise_for_status()

return r.json()['ancestors']



def get_page_info(pageid):

url = '{base}/{pageid}'.format(
base = BASE_URL,
pageid = pageid)

r = s.get(url, auth = auth)

r.raise_for_status()

return r.json()



def write_data(html, pageid, title = None):

info = get_page_info(pageid)

ver = int(info['version']['number']) + 1

ancestors = get_page_ancestors(pageid)

anc = ancestors[-1]
del anc['_links']
del anc['_expandable']
del anc['extensions']

if title is not None:
info['title'] = title

data = {
             'id' : str(pageid),
             'type' : 'page',
             'title' : info['title'],
             'version' : {'number' : ver},
             'ancestors' : [anc],
             'body' : {
                      'storage' :
                      {
                       'representation' : 'storage',
                       'value' : str(html),
                       }
             }
       }

data = json.dumps(data)

url = '{base}/{pageid}'.format(base = BASE_URL, pageid = pageid)

r = s.put(
url,
data = data,
headers = { 'Content-Type' : 'application/json' }
)

r.raise_for_status()

print ("Wrote '%s' version %d" % (info['title'], ver))
print ("URL: %s%s" % (VIEW_URL, pageid))

with pd.option_context('display.max_colwidth', -1):
htmlpayload = storagedataframe.to_html(index=False, justify="justify", escape=True)

write_data(htmlpayload, '1234567890')

This code works fine, but only if code in htmlpayload has been made HTML-safe. For example, if you don't escape HTML:

with pd.option_context('display.max_colwidth', -1):
htmlpayload = storagedataframe.to_html(index=False, justify="justify", escape=True)

the running the code returns

HTTPError: 400 Client Error:  for url: https://example.net/confluence_instance/rest/api/content/1234567890

I referred to official Atlassian documentation and thought I'd follow its recommendations on forming JSON payload, which are as follows:

PUT /rest/api/content/456

{
    "version": {
        "number": 2
    },
    "ancestors": [
        {
            "id": 789
        }
    ],
    "type": "page",
    "body": {
        "storage": {
            "value": "<p>New page data.</p>",
            "representation": "storage"
        }
    }
}

So I wrote a slightly modified version of write_data(), which follows the recommended format:

def write_data1(html, pageid, title = None):

info = get_page_info(pageid)

ver = int(info['version']['number']) + 1

ancestors = get_page_ancestors(pageid)

anc = ancestors[-1]
del anc['_links']
del anc['_expandable']
del anc['extensions']

if title is not None:
info['title'] = title

data = {
          'version': {
                       'number': ver
            },
          'ancestors': [
                         {
                          'id': [anc]
                          }
           ],
          'type': 'page',
                 'body': {
                          'storage': {
                                       'value': str(html),
                                       'representation': 'storage'
                           }
                  } 
 }

data = json.dumps(data)

url = '{base}/{pageid}'.format(base = BASE_URL, pageid = pageid)

r = s.put(
url,
data = data,
headers = { 'Content-Type' : 'application/json', 'charset' : 'utf-8' }
)

r.raise_for_status()

print ("Wrote '%s' version %d" % (info['title'], ver))
print ("URL: %s%s" % (VIEW_URL, pageid))

Unfortunately, this code ALWAYS returns HTML error 400 no matter if I escape the payload or not.

So I went further and used another JSON format for payload, found here.

This led me to yet another version of the update function:

def write_data2(html, pageid, title = None):

info = get_page_info(pageid)

ver = int(info['version']['number']) + 1
spacekey = info['space']['key']

ancestors = get_page_ancestors(pageid)

anc = ancestors[-1]
del anc['_links']
del anc['_expandable']
del anc['extensions']

if title is not None:
info['title'] = title

data = {
           "id": pageid,
           "type":"page",
           "title": info['title'],
           "space":{
                     "key": spacekey
            },
           "body":{
                     "storage":{
                                   "value":str(html),
                                   "representation":"storage"
                      }
            },
           "version":{
                        "number": ver
           }
}

data = json.dumps(data)

url = '{base}/{pageid}'.format(base = BASE_URL, pageid = pageid)

r = s.put(
url,
data = data,
headers = { 'Content-Type' : 'application/json', 'charset' : 'utf-8' }
)

r.raise_for_status()

print ("Wrote '%s' version %d" % (info['title'], ver))
print ("URL: %s%s" % (VIEW_URL, pageid))

This version works exactly like the first one, found here. It does publish HTML code but only if I escape the HTML data. Otherwise, it returns error 400.

I'd appreciate someone helping me here as following questions arise:

1. How do I change the code so that HTML code is properly displayed on the target Confluence page: like HTML hyperlinks are displayed as hyperlinks and not as a hypertext markup.

2. How do I make the provided example on Atlassian API work for me? Is there anything I've missed with implementing the example in my code?

Thank you in advance.

3 answers

1 accepted

2 votes
Answer accepted
Stan Ry
Rising Star
Rising Star
Rising Stars are recognized for providing high-quality answers to other users. Rising Stars receive a certificate of achievement and are on the path to becoming Community Leaders.
December 5, 2019

Figured myself. For those interested, the answer seems to be connected with how Confluence handles special characters on the server side when you post the content on a page. Despite the fact that Confluence may return special characters, it won't let you post them back until you encode them.
In my case the issue was with ampersand ('&') character found in page URLs returned by Confluence. If you use these URLs on a page, you won't be able to post the page with them.

Suppose your page content is contained in the original_content variable which is a string variable with HTML content that contains URLs with contatenated parameters where you use the ampersand symbol. In order to post this content to a confluence page you have to convert the ampersand character to its '&amp;' HTML code. You can do that like that:

payload_to_post = original_content.replace("&", "&amp;")
1 vote
Stan Ry
Rising Star
Rising Star
Rising Stars are recognized for providing high-quality answers to other users. Rising Stars receive a certificate of achievement and are on the path to becoming Community Leaders.
November 28, 2019

I am going to bump the topic. Would appreciate any help. Thank you.

yatindra kumar janghel October 13, 2021

Hi @Stan Ry , by any chance did you get to know the solution of  issue which you are facing.I am facing this issue with respect to operators  like  =,===,' 

0 votes
Stan Ry
Rising Star
Rising Star
Rising Stars are recognized for providing high-quality answers to other users. Rising Stars receive a certificate of achievement and are on the path to becoming Community Leaders.
December 5, 2019

By the way, here's yet another option to post data to a page:

def write_data2(htmlobject, pageid, title = None):

info = get_page_info(pageid)

ver = int(info['version']['number']) + 1
spacekey = info['space']['key']

ancestors = get_page_ancestors(pageid)

anc = ancestors[-1]
del anc['_links']
del anc['_expandable']
del anc['extensions']

if title is not None:
info['title'] = title

data = {
        "id": pageid,
        "type":"page",
        "title": info['title'],
        "space":{
                  "key": spacekey
        },
        "body":{
        "storage":{
                     "value":str(htmlobject),
                      "representation":"storage"
                        }
         },
        "version":{
                     "number": ver
        }
}

data = json.dumps(data)

url = '{base}/{pageid}'.format(base = BASE_URL, pageid = pageid)

r = s.put(
      url,
      data = data,
      headers = { 'Content-Type' : 'application/json', 'charset' : 'utf-8' }
)

r.raise_for_status()

print ("Wrote '%s' version %d" % (info['title'], ver))
print ("URL: %s%s" % (VIEW_URL, pageid))

Suggest an answer

Log in or Sign up to answer
TAGS
AUG Leaders

Atlassian Community Events