Create
cancel
Showing results for 
Search instead for 
Did you mean: 
Sign up Log in

It's not the same without you

Join the community to find out what other Atlassian users are discussing, debating and creating.

Atlassian Community Hero Image Collage

Python script fails to write to confluence with a lot of data

I wrote a python script that seemed to work pretty well. It lists EC2 instances in AWS and then writes them to our Confluence wiki.

If it processes one environment with 10 servers it works and writes to Confluence. If it works against an account with 100 or more servers it fails to write to Confluence with this stack trace:

Traceback (most recent call last):
File ".\aws_ec2_list_instances_orig.py", line 550, in <module>
main()
File ".\aws_ec2_list_instances_orig.py", line 543, in main
write_data_to_confluence(auth, html, pageid, title)
File ".\aws_ec2_list_instances_orig.py", line 391, in write_data_to_confluence
r.raise_for_status()
File "C:\Users\tdunphy\AppData\Roaming\Python\Python37\site-packages\requests\models.py", line 940, in raise_for_status
requests.exceptions.HTTPError: 400 Client Error: for url: https://wiki.us.cworld.company.com/rest/api/content/138317098

I've also raised a verbose error here:

Traceback (most recent call last):
File ".\aws_ec2_list_instances_orig.py", line 538, in <module>
main()
File ".\aws_ec2_list_instances_orig.py", line 531, in main
write_data_to_confluence(auth, html, pageid, title)
File ".\aws_ec2_list_instances_orig.py", line 380, in write_data_to_confluence
raise RuntimeError(r.content)
RuntimeError: b'{"statusCode":400,"data":{"authorized":false,"valid":true,"allowedInReadOnlyMode":true,"errors":[],"successful":false},"message":"Error parsing xhtml: Unexpected character \'<\' (code 60); expected a semi-colon after the reference for entity \'C\'\\n at [row,col {unknown-source}]: [1,46579]","reason":"Bad Request"}'

Please note I AM NOT ALLOWED TO POST THE COMPANY DOMAIN IN MY POSTS. I will substitute 'company.com' where my real company domain would be.

Here is the script:

#!/usr/bin/env python3

# Import modules
import boto3
import time
import objectpath
import csv
import os
import sys
import json
import requests
from requests_kerberos import HTTPKerberosAuth
import codecs
from datetime import datetime
from os.path import basename
from subprocess import check_output,CalledProcessError,PIPE

BASE_URL = "https://wiki.us.cworld.company.com/rest/api/content"
VIEW_URL = "https://wiki.us.cworld.company.com/pages/viewpage.action?pageId="

def banner(message, border='-'):
line = border * len(message)
print(line)
print(message)
print(line)

def initialize(interactive, aws_account):
# Set the date
today = datetime.today()
today = today.strftime("%m-%d-%Y")
# Set source files
aws_env_list="../../source_files/aws_environments/aws_environments_all.txt"
output_dir = "../../output_files/aws_instance_list/csv/"
output_file = output_dir + 'aws-instance-master-list-' + aws_account + '-' + today +'.csv'
output_file_name = 'aws-instance-master-list-' + aws_account + '-' + today +'.csv'
return today, aws_env_list, output_file, output_file_name

def authenticate():
#auth = get_login()
auth = ('tdunphy', 'local4tl4nt1cNJ!')
auth = str(auth).replace('(','').replace('\'','').replace(',',':').replace(')','').replace(' ','')
kerberos_auth = HTTPKerberosAuth(mutual_authentication="DISABLED",principal=auth)
auth = kerberos_auth
return auth

## These are dummy AWS account numbers. I cannot post account number for my company.
def aws_accounts_to_account_numbers(aws_account):
switcher = {
'company-lab': '123456789101',
'company-bill': '123456789102',
'company-stage': '123456789103',
'company-dlab': '123456789103',
}
return switcher.get(aws_account, "nothing")


def list_instances(aws_account,aws_account_number, interactive):
today, aws_env_list, output_file, output_file_name = initialize(interactive, aws_account)
engagement = None
# Set the account
session = boto3.Session(profile_name=aws_account)
ec2 = session.client("ec2")
fieldnames = [ 'AWS Account', 'Account Number', 'Name', 'Instance ID', 'VPC ID', 'Type', 'Platform', 'State', 'Key Name', 'Private IP', 'Public IP', 'Private DNS', 'Volumes', 'Availability Zone', 'Launch Date', 'Engagement Code']
# Set the ec2 dictionary
ec2info = {}
public_ips_list = ''
private_ips_list = ''
private_dns = None
with open(output_file, mode='w+') as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames, delimiter=',', lineterminator='\n')
writer.writeheader()
if 'gov' in aws_account and not 'admin' in aws_account:
print("This is a gov account.")
session = boto3.Session(profile_name=aws_account,region_name='us-gov-west-1')
else:
print("This is a commercial account.")
session = boto3.Session(profile_name=aws_account,region_name='us-east-1')

ec2 = session.client("ec2")
# Loop through the instances
instance_list = ec2.describe_instances()
for reservation in instance_list["Reservations"]:
for instance in reservation.get("Instances", []):
launch_time = instance["LaunchTime"]
launch_time_friendly = launch_time.strftime("%B %d %Y")
tree = objectpath.Tree(instance)
block_devices = set(tree.execute('$..BlockDeviceMappings[\'Ebs\'][\'VolumeId\']'))
if len(block_devices) == 0:
block_devices_list = None
else:
block_devices_list = list(block_devices)
block_devices_list = str(block_devices_list).replace('[','').replace(']','').replace('\'','').replace('{','').replace('}', '')
private_ips = set(tree.execute('$..PrivateIpAddress'))
if len(private_ips) == 0:
private_ips_list = None
else:
private_ips_list = list(private_ips)
private_ips_list = str(private_ips_list).replace('[','').replace(']','').replace('\'','')
public_ips = set(tree.execute('$..PublicIp'))
if len(public_ips) == 0:
public_ips_list = None
else:
public_ips_list = list(public_ips)
public_ips_list = str(public_ips_list).replace('[','').replace(']','').replace('\'','')
if 'KeyName' in instance:
key_name = instance['KeyName']
else:
key_name = None
name = None
if 'Tags' in instance:
try:
tags = instance['Tags']
name = None
for tag in tags:
if tag["Key"] == "Name":
name = tag["Value"]
for tag in tags:
if tag["Key"] == "Engagement":
engagement = tag["Value"]
else:
engagement = None
except ValueError:
print("Instance: %s has no tags" % instance_id)
if 'VpcId' in instance:
vpc_id = instance['VpcId']
else:
vpc_id = None
if 'PrivateDnsName' in instance:
private_dns = instance['PrivateDnsName']
else:
private_dns = None
if 'Platform' in instance:
platform = instance['Platform']
else:
platform = None
ec2info[instance['InstanceId']] = {
'AWS Account': aws_account,
'Account Number': aws_account_number,
'Name': name,
'Instance ID': instance['InstanceId'],
'VPC ID': vpc_id,
'Type': instance['InstanceType'],
'Platform': platform,
'State': instance['State']['Name'],
'Key Name': key_name,
'Private IP': private_ips_list,
'Public IP': public_ips_list,
'Private DNS': private_dns,
'Volumes': block_devices_list,
'Availability Zone': instance['Placement']['AvailabilityZone'],
'Launch Date': launch_time_friendly,
'Engagement Code': engagement
}
with open(output_file,'a') as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames, delimiter=',', lineterminator='\n')
writer.writerow({'AWS Account': aws_account, "Account Number": aws_account_number, 'Name': name, 'Instance ID': instance["InstanceId"], 'VPC ID': vpc_id, 'Type': instance["InstanceType"], 'Platform': platform, 'State': instance["State"]["Name"], 'Key Name': key_name, 'Private IP': private_ips_list, 'Public IP': public_ips_list, 'Private DNS': private_dns, 'Volumes': block_devices, 'Availability Zone': instance['Placement']['AvailabilityZone'], 'Launch Date': launch_time_friendly, 'Engagement Code': engagement})
for instance_id, instance in ec2info.items():
print(Fore.RESET + "-------------------------------------")
for key in [
'AWS Account',
'Account Number',
'Name',
'Instance ID',
'VPC ID',
'Type',
'Platform',
'Key Name',
'State',
'Private IP',
'Public IP',
'Private DNS',
'Volumes',
'Availability Zone',
'Launch Date',
'Engagement Code'
]:
print(Fore.GREEN + "{0}: {1}".format(key, instance.get(key)))
time.sleep(2)
print(Fore.RESET + "-------------------------------------")
with open(output_file,'a') as csv_file:
csv_file.close()
return output_file


def convert_csv_to_html_table(output_file, today, interactive, aws_account):
output_dir = "../../output_files/aws_instance_list/html/"
if interactive == 1:
htmlfile = output_dir + 'aws-instance-master-list-' + aws_account + '-' + today +'.html'
htmlfile_name = 'aws-instance-master-list-' + aws_account + '-' + today +'.html'
else:
htmlfile = output_dir + 'aws-instance-master-list-' + today +'.html'
htmlfile_name = 'aws-instance-master-list-' + today +'.html'
count = 0
html = ''
with open(output_file,'r') as CSVFILE:
reader = csv.reader(CSVFILE)
with open(output_file,'r') as CSVFILE:
reader = csv.reader(CSVFILE)
html += "<table><tbody>"
for row in reader:
html += "<tr>"
# Process the headers
if count == 0:
for column in row:
html += "<th>%s</th>" % column
else:
# Process the data
for column in row:
html += "<td>%s</td>" % column
html += "</tr>"
count += 1
html += "</tbody></table>"
with open(htmlfile,'w+') as HTMLFILE:
HTMLFILE.write(html)
return htmlfile, htmlfile_name


def get_page_ancestors(auth, pageid):
# Get basic page information plus the ancestors property
url = '{base}/{pageid}?expand=ancestors'.format(
base = BASE_URL,
pageid = pageid)
r = requests.get(url, auth = auth)
r.raise_for_status()
return r.json()['ancestors']


def get_page_info(auth, pageid):
url = '{base}/{pageid}'.format(
base = BASE_URL,
pageid = pageid)
r = requests.get(url, auth = auth)
r.raise_for_status()
return r.json()


def write_data_to_confluence(auth, html, pageid, title = None):
info = get_page_info(auth, pageid)
ver = int(info['version']['number']) + 1
ancestors = get_page_ancestors(auth, pageid)
anc = ancestors[-1]
del anc['_links']
del anc['_expandable']
del anc['extensions']
if title is not None:
info['title'] = title
data = {
'id' : str(pageid),
'type' : 'page',
'title' : info['title'],
'version' : {'number' : ver},
'ancestors' : [anc],
'body' : {
'storage' :
{
'representation' : 'storage',
'value' : str(html)
}
}
}
data = json.dumps(data)
url = '{base}/{pageid}'.format(base = BASE_URL, pageid = pageid)
r = requests.put(
url,
data = data,
auth = auth,
headers = { 'Content-Type' : 'application/json' }
)
r.raise_for_status()
print("Wrote '%s' version %d" % (info['title'], ver))
print("URL: %s%d" % (VIEW_URL, pageid))

def main():
pageid = 138317098
title = 'AWS EC2 Instance List'
aws_account = input("Enter the name of the AWS account you'll be working in: ")
aws_account_number = aws_accounts_to_account_numbers(aws_account)
today, aws_env_list, output_file, output_file_name = initialize(interactive, aws_account)
output_file = list_instances(aws_account,aws_account_number, interactive)
htmlfile, htmlfile_name = convert_csv_to_html_table(output_file, today, interactive, aws_account)
with open(htmlfile, 'r', encoding='utf-8') as htmlfile:
html = htmlfile.read()
auth = authenticate()
write_data_to_confluence(auth, html, pageid, title)


if __name__ == "__main__":
main()


Why does this script fail to write to confluence only when it processes a lot of servers?

0 answers

Suggest an answer

Log in or Sign up to answer
TAGS
Community showcase
Published in Apps & Integrations

🍻🍂Apptoberfest Update: Upcoming Virtual Events 🎉

Hello Community! I hope you've been enjoying the 🍂Apptoberfestivities🍂 (I know I have!) The event is heating up next week with a series of virtual events that we're calling the 🍻🍂Partner App ...

114 views 1 8
Read article

Community Events

Connect with like-minded Atlassian users at free events near you!

Find an event

Connect with like-minded Atlassian users at free events near you!

Unfortunately there are no Community Events near you at the moment.

Host an event

You're one step closer to meeting fellow Atlassian users at your local event. Learn more about Community Events

Events near you