Get CSV user work log reports from Jira Cloud with Python

 

Hi Atlassian Community!

It's time to share another Python script.
This script automates
 the generation of work log reports from Jira. It is designed to assist teams in efficiently tracking time spent on projects and provide flexible reporting options to suit different needs.

The solution

This script makes use of the Jira REST API to gather work log data across specified projects within a given timeframe. It then generates CSV reports based on user preferences, offering insights into work logs per user or/and per project.

Key Features

  • Dynamic Project Selection
    Input the number of projects and their keys to fetch work logs.

  • Customizable Timeframe
    Specify the number of days (up to 30 - customizable) for which you want to retrieve work logs.

  • Flexible Reporting Options: Choose from multiple report formats, including:

    1. All work logs per user

    2. Total time spent per user per project

    3. Total time spent per project

    4. Specific user work logs (using user's name)

    5. Total time spent by a specific user per project

    6. Comprehensive reports combining all options for all users

    7. Comprehensive reports combining all options for a specific user

Preparation

Python Environment: Install the necessary libraries by running:

pip install requests pandas

Prepare your Jira Cloud site URL (your-domain.atlassian.net), email address with admin rights, and API token. You can generate an API token from Atlassian's account security page.

The script


import requests
import time
from datetime import datetime, timedelta
from requests.auth import HTTPBasicAuth
from getpass import getpass
import pandas as pd
import os
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

jira_domain = input("Enter your Jira domain (e.g., your-domain.atlassian.net): ")
email = input("Enter your Jira email: ")
api_token = getpass("Enter your Jira API token: ")
auth = HTTPBasicAuth(email, api_token)

headers = {
    "Accept": "application/json",
    "Content-Type": "application/json"
}

total_projects_retrieved = 0
total_issues_retrieved = 0
total_worklogs_retrieved = 0

def get_project_keys():
    """Get project keys from user input."""
    while True:
        try:
            project_keys = input("\nEnter project keys separated by commas (Ex. PROJ1,PROJ2,PROJ3): ").strip().split(',')
            project_keys = [key.strip() for key in project_keys if key.strip()]
            if project_keys:
                return project_keys
            else:
                raise ValueError
        except ValueError:
            print("Please provide valid project keys.")

def get_issues_for_project(project_key, start_date):
    """Fetch issues for a given project that were updated or created since start_date."""
    issues = []
    start_at = 0
    max_results = 50
    jql = f"project={project_key} AND updated >= {start_date.strftime('%Y-%m-%d')}"
   
    while True:
        try:
            url = f"https://{jira_domain}/rest/api/3/search"
            params = {"jql": jql, "startAt": start_at, "maxResults": max_results}
            response = requests.get(url, headers=headers, auth=auth, params=params)

            response.raise_for_status()
            data = response.json()
            issues.extend(data['issues'])
            if start_at + max_results >= data['total']:
                break
            start_at += max_results
            time.sleep(0.3)
        except requests.exceptions.RequestException as e:
            logging.error(f"Failed to fetch issues for project {project_key}. Error: {e}")
            break
    return issues

def get_worklogs(issue_key, start_date):
    """Fetch worklogs for a given issue since start_date."""
    worklogs = []
    try:
        url = f"https://{jira_domain}/rest/api/3/issue/{issue_key}/worklog"
        response = requests.get(url, headers=headers, auth=auth)

        response.raise_for_status()
        data = response.json()
        for worklog in data['worklogs']:
            log_date = datetime.strptime(worklog['started'], '%Y-%m-%dT%H:%M:%S.%f%z').date()
            if log_date >= start_date:
                worklogs.append(worklog)
        time.sleep(0.3)
    except requests.exceptions.RequestException as e:
        logging.error(f"Failed to fetch worklogs for issue {issue_key}. Error: {e}")
    return worklogs

def convert_to_minutes(time_string):
    """Convert Jira time format to total minutes."""
    time_units = {"w": 10080, "d": 1440, "h": 60, "m": 1}
    total_minutes = 0
    time_parts = time_string.split()
    for part in time_parts:
        unit = part[-1]
        value = int(part[:-1])
        total_minutes += value * time_units[unit]
    return total_minutes

def save_report(df, csv_file):
    """Save the dataframe to a CSV file."""
    try:
        df.to_csv(csv_file, index=False)
        logging.info(f"Report generated and saved to {csv_file}")
    except Exception as e:
        logging.error(f"Failed to save the report. Error: {e}")

def generate_user_report(df, specific_user=None):
    """Generate report data filtered by a specific user."""
    if specific_user:
        df = df[df['User'] == specific_user]
    return df

def generate_time_summary(df, group_by_columns):
    """Generate summary report grouped by the specified columns."""
    return df.groupby(group_by_columns)['Time Spent (minutes)'].sum().reset_index()

def generate_reports(df):
    """Handle the generation of different types of reports."""
    report_choices = {
        1: lambda df: df,
        2: lambda df: generate_time_summary(df, ['Project', 'User']),
        3: lambda df: generate_time_summary(df, ['Project']),
        4: lambda df, user: generate_user_report(df, user),
        5: lambda df, user: generate_time_summary(generate_user_report(df, user), ['Project', 'User']),
        6: lambda df: pd.concat([df, generate_time_summary(df, ['Project', 'User']), generate_time_summary(df, ['Project'])]),
        7: lambda df, user: pd.concat([generate_user_report(df, user), generate_time_summary(generate_user_report(df, user), ['Project', 'User'])])
    }
   
    file_paths = []

    while True:
        print("\nChoose the type of report you want to generate:")
        print("1. All worklogs per user (all users)")
        print("2. Total time spent per user (all users) per project")
        print("3. Total time spent per project (includes all users)")
        print("4. All worklogs by a specific user")
        print("5. Total time spent by a specific user per project")
        print("6. All worklogs per user + Total time spent per user per project + Total time spent per project")
        print("7. All worklogs by a specific user + Total time spent by the user per project + Total time spent per project by the user")

        try:
            choice = int(input("\nEnter your choice (1-7): "))
            if choice not in report_choices:
                raise ValueError("Invalid choice.")
           
            specific_user = None
            if choice in [4, 5, 7]:
                specific_user = input("Enter the user name for the specific user report: ")

            report_func = report_choices[choice]
            report_data = report_func(df, specific_user) if specific_user else report_func(df)
            csv_file = f"jira_report_choice_{choice}.csv"
            save_report(report_data, csv_file)
            file_paths.append(os.path.abspath(csv_file))

            another_report = input("\nDo you want to generate another report? (yes/no): ").strip().lower()
            if another_report != 'yes':
                break
        except ValueError as e:
            logging.error(f"{e}. Please try again.")

    if file_paths:
        print("\nThe following reports were created:")
        for path in file_paths:
            print(path)

def generate_report():
    """Main function to generate the worklog report."""
    global total_projects_retrieved, total_issues_retrieved, total_worklogs_retrieved

    project_keys = get_project_keys()
    while True:
        try:
            days = int(input("\nEnter how many days in the past to create the report for (1-30): "))
            if 1 <= days <= 30:
                break
            else:
                raise ValueError("Days must be between 1 and 30.")
        except ValueError as e:
            logging.error(f"{e}. Please try again.")
   
    start_date = datetime.now().date() - timedelta(days=days)

    report_data = []
    for project_key in project_keys:
        logging.info(f"Processing project: {project_key}")
        issues = get_issues_for_project(project_key, start_date)
        total_projects_retrieved += 1
        total_issues_retrieved += len(issues)
        worklogs_retrieved = 0
        for issue in issues:
            worklogs = get_worklogs(issue['id'], start_date)
            worklogs_retrieved += len(worklogs)
            for worklog in worklogs:
                time_spent_minutes = convert_to_minutes(worklog['timeSpent'])
                report_data.append({
                    "Project": project_key,
                    "User": worklog['author']['displayName'],
                    "Issue": issue['key'],
                    "Time Spent (minutes)": time_spent_minutes,
                    "Date": worklog['started'][:10]
                })
        total_worklogs_retrieved += worklogs_retrieved
        logging.info(f"Completed processing {len(issues)} issues and retrieved {worklogs_retrieved} worklogs for project {project_key}.")

    if report_data:
        df = pd.DataFrame(report_data)
        generate_reports(df)
    else:
        logging.info("\nNo worklog data available to generate a report.")
   
    logging.info(f"\nTotal projects processed: {total_projects_retrieved}")
    logging.info(f"Total issues processed: {total_issues_retrieved}")
    logging.info(f"Total worklogs retrieved: {total_worklogs_retrieved}")

if __name__ == "__main__":
    try:
        generate_report()
    except Exception as e:
        logging.critical(f"An unexpected error occurred: {e}")

Sneak peek
Zight 2024-08-20 at 4.36.17 PM.jpg.jpg

Considerations

  • API Rate Limits
    The script includes pauses to avoid hitting Jira's rate limits which could cause some slowness in the total execution time

  • Data Privacy
    Ensure that the API token is kept secure and not hardcoded into the script

  • Output Files
    The script generates CSV files in the current directory. Ensure write permissions are available

  • Report time unit
    The report will be generated with the work logs in minutes by default, you can change this to hours/seconds/days at your convenience

Disclaimer:

While this script is designed to facilitate certain interactions with JIRA Software Cloud as a convenience, it is essential to understand that its functionality is subject to change due to updates to JIRA Software Cloud’s API or other conditions that could affect its operation.

Please note that this script is provided on an "as is" and "as available" basis without any warranties of any kind. This script is not officially supported or endorsed by Atlassian, and its use is at your own discretion and risk.

Thanks to @Rustem Shiriiazdanov for the improvement suggestions!

Cheers!

3 comments

Rustem Shiriiazdanov August 20, 2024

Hi @Delfino Rosales 

Really cool script, thanks! I absolutely love the approach, and implementation!

I noticed one point point to improve the performance. It looked that script is gathering all issues for a specific project regardless of time limit. I tested it on a big instance with a project including 2000+ issues, and it indeed started to gather all the issues.

So i changed the logic to include include a date filter (AND updated >= {start_date.strftime('%Y-%m-%d')}), which only fetches issues that were updated within the user-specified timeframe.

It significantly increases the performance, and still keeps all the required tickets gathered (as entering the worklog updates the ticket -> we do not lose any issues when adding this condition. At least, I did not find any immediate scenarios where we may lose anything due to excessive filtering).

Few other things I'd suggest implementing (but that's purely IMHO, since the possible changes will depend on the specific user need, I guess):

1. I thought it may be an overcomplication to enter number of projects and then iterate through them. So i replaced that logic with comma-separated user input for projects

2. There were some repeating lines of code I'd suggest to get rid of (see below the code), and some functions I'd personally suggest making smaller.

3. I also added a logging of a network requests. But that's only for a debugging purposes as I investigated what's happening with the report when data queiring required way too much time

---

The updated script is below, I'd really appreciate your comments!

---

import requests
import time
from datetime import datetime, timedelta
from requests.auth import HTTPBasicAuth
from getpass import getpass
import pandas as pd
import os
import logging

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# User input for Jira credentials and domain
jira_domain = input("Enter your Jira domain (e.g., your-domain.atlassian.net): ")
email = input("Enter your Jira email: ")
api_token = getpass("Enter your Jira API token: ")  # Secure input for API token
auth = HTTPBasicAuth(email, api_token)

headers = {
    "Accept": "application/json",
    "Content-Type": "application/json"
}

# Initialize counters
total_projects_retrieved = 0
total_issues_retrieved = 0
total_worklogs_retrieved = 0

def get_project_keys():
    """Get project keys from user input."""
    while True:
        try:
            project_keys = input("\nEnter project keys separated by commas: ").strip().split(',')
            project_keys = [key.strip() for key in project_keys if key.strip()]
            if project_keys:
                return project_keys
            else:
                raise ValueError
        except ValueError:
            print("Please provide valid project keys.")

def get_issues_for_project(project_key, start_date):
    """Fetch issues for a given project that were updated or created since start_date."""
    issues = []
    start_at = 0
    max_results = 50
    jql = f"project={project_key} AND updated >= {start_date.strftime('%Y-%m-%d')}"
   
    while True:
        try:
            url = f"https://{jira_domain}/rest/api/3/search"
            params = {"jql": jql, "startAt": start_at, "maxResults": max_results}
            logging.info(f"Sending request to {url} with params: {params}")
            response = requests.get(url, headers=headers, auth=auth, params=params)
            logging.info(f"Received response with status code {response.status_code} for project {project_key}")

            response.raise_for_status()
            data = response.json()
            issues.extend(data['issues'])
            if start_at + max_results >= data['total']:
                break
            start_at += max_results
            time.sleep(0.3)  # Pause to avoid rate limit
        except requests.exceptions.RequestException as e:
            logging.error(f"Failed to fetch issues for project {project_key}. Error: {e}")
            break
    return issues

def get_worklogs(issue_key, start_date):
    """Fetch worklogs for a given issue since start_date."""
    worklogs = []
    try:
        url = f"https://{jira_domain}/rest/api/3/issue/{issue_key}/worklog"
        logging.info(f"Sending request to {url}")
        response = requests.get(url, headers=headers, auth=auth)
        logging.info(f"Received response with status code {response.status_code} for issue {issue_key}")

        response.raise_for_status()
        data = response.json()
        for worklog in data['worklogs']:
            log_date = datetime.strptime(worklog['started'], '%Y-%m-%dT%H:%M:%S.%f%z').date()
            if log_date >= start_date:
                worklogs.append(worklog)
        time.sleep(0.3)  # Pause to avoid rate limit
    except requests.exceptions.RequestException as e:
        logging.error(f"Failed to fetch worklogs for issue {issue_key}. Error: {e}")
    return worklogs

def convert_to_minutes(time_string):
    """Convert Jira time format to total minutes."""
    time_units = {"w": 10080, "d": 1440, "h": 60, "m": 1}
    total_minutes = 0
    time_parts = time_string.split()
    for part in time_parts:
        unit = part[-1]
        value = int(part[:-1])
        total_minutes += value * time_units[unit]
    return total_minutes

def save_report(df, csv_file):
    """Save the dataframe to a CSV file."""
    try:
        df.to_csv(csv_file, index=False)
        logging.info(f"Report generated and saved to {csv_file}")
    except Exception as e:
        logging.error(f"Failed to save the report. Error: {e}")

def generate_user_report(df, specific_user=None):
    """Generate report data filtered by a specific user."""
    if specific_user:
        df = df[df['User'] == specific_user]
    return df

def generate_time_summary(df, group_by_columns):
    """Generate summary report grouped by the specified columns."""
    return df.groupby(group_by_columns)['Time Spent (minutes)'].sum().reset_index()

def generate_reports(df):
    """Handle the generation of different types of reports."""
    report_choices = {
        1: lambda df: df,
        2: lambda df: generate_time_summary(df, ['Project', 'User']),
        3: lambda df: generate_time_summary(df, ['Project']),
        4: lambda df, user: generate_user_report(df, user),
        5: lambda df, user: generate_time_summary(generate_user_report(df, user), ['Project', 'User']),
        6: lambda df: pd.concat([df, generate_time_summary(df, ['Project', 'User']), generate_time_summary(df, ['Project'])]),
        7: lambda df, user: pd.concat([generate_user_report(df, user), generate_time_summary(generate_user_report(df, user), ['Project', 'User'])])
    }
   
    file_paths = []

    while True:
        print("\nChoose the type of report you want to generate:")
        print("1. All worklogs per user (all users)")
        print("2. Total time spent per user (all users) per project")
        print("3. Total time spent per project (includes all users)")
        print("4. All worklogs by a specific user")
        print("5. Total time spent by a specific user per project")
        print("6. All worklogs per user + Total time spent per user per project + Total time spent per project")
        print("7. All worklogs by a specific user + Total time spent by the user per project + Total time spent per project by the user")

        try:
            choice = int(input("\nEnter your choice (1-7): "))
            if choice not in report_choices:
                raise ValueError("Invalid choice.")
           
            specific_user = None
            if choice in [4, 5, 7]:
                specific_user = input("Enter the user name for the specific user report: ")

            report_func = report_choices[choice]
            report_data = report_func(df, specific_user) if specific_user else report_func(df)
            csv_file = f"jira_report_choice_{choice}.csv"
            save_report(report_data, csv_file)
            file_paths.append(os.path.abspath(csv_file))

            another_report = input("\nDo you want to generate another report? (yes/no): ").strip().lower()
            if another_report != 'yes':
                break
        except ValueError as e:
            logging.error(f"{e}. Please try again.")

    if file_paths:
        print("\nThe following reports were created:")
        for path in file_paths:
            print(path)

def generate_report():
    """Main function to generate the worklog report."""
    global total_projects_retrieved, total_issues_retrieved, total_worklogs_retrieved

    project_keys = get_project_keys()
    while True:
        try:
            days = int(input("\nEnter how many days in the past to create the report for (1-30): "))
            if 1 <= days <= 30:
                break
            else:
                raise ValueError("Days must be between 1 and 30.")
        except ValueError as e:
            logging.error(f"{e}. Please try again.")
   
    start_date = datetime.now().date() - timedelta(days=days)

    report_data = []
    for project_key in project_keys:
        logging.info(f"Processing project: {project_key}")
        issues = get_issues_for_project(project_key, start_date)
        total_projects_retrieved += 1
        total_issues_retrieved += len(issues)
        worklogs_retrieved = 0
        for issue in issues:
            worklogs = get_worklogs(issue['id'], start_date)
            worklogs_retrieved += len(worklogs)
            for worklog in worklogs:
                time_spent_minutes = convert_to_minutes(worklog['timeSpent'])
                report_data.append({
                    "Project": project_key,
                    "User": worklog['author']['displayName'],
                    "Issue": issue['key'],
                    "Time Spent (minutes)": time_spent_minutes,
                    "Date": worklog['started'][:10]
                })
        total_worklogs_retrieved += worklogs_retrieved
        logging.info(f"Completed processing {len(issues)} issues and retrieved {worklogs_retrieved} worklogs for project {project_key}.")

    if report_data:
        df = pd.DataFrame(report_data)
        generate_reports(df)
    else:
        logging.info("\nNo worklog data available to generate a report.")
   
    logging.info(f"\nTotal projects processed: {total_projects_retrieved}")
    logging.info(f"Total issues processed: {total_issues_retrieved}")
    logging.info(f"Total worklogs retrieved: {total_worklogs_retrieved}")

if __name__ == "__main__":
    try:
        generate_report()
    except Exception as e:
        logging.critical(f"An unexpected error occurred: {e}")

Like Delfino Rosales likes this
Delfino Rosales
Atlassian Team
Atlassian Team members are employees working across the company in a wide variety of roles.
August 20, 2024

@Rustem Shiriiazdanov Thanks for checking the script and sharing the improvements!

Your suggestions make total sense, and I've updated the script originally shared with your approach and some small tweaks to reduce detailed logging and keep the user feedback clean.

Thanks again!

Like Rustem Shiriiazdanov likes this
Rustem Shiriiazdanov August 20, 2024

Thanks @Delfino Rosales! I hope people from the Community will find the script useful. I do really enjoy working with JIRA API using python and pandas, and I think more people should try it out xD

Thanks for promoting it!

Comment

Log in or Sign up to comment
TAGS
AUG Leaders

Atlassian Community Events