Skip to content

Commit eeadb4e

Browse files
committed
Merge branch 'main' of https://github.com/ServiceNow/workarena into js_init
2 parents a1cf71f + a772230 commit eeadb4e

26 files changed

Lines changed: 1461 additions & 207 deletions
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
name: Monitor the pool of WorkArena instances
2+
3+
on:
4+
workflow_dispatch:
5+
schedule:
6+
- cron: "0 3 * * *" # daily at 03:00 UTC
7+
8+
jobs:
9+
10+
test-l1-tasks:
11+
name: Test L1 tasks
12+
runs-on: ubuntu-22.04
13+
14+
defaults:
15+
run:
16+
shell: bash -l {0}
17+
18+
env:
19+
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
20+
21+
steps:
22+
- name: Checkout Repository
23+
uses: actions/checkout@v4
24+
25+
- name: Set up Python
26+
uses: actions/setup-python@v5
27+
with:
28+
python-version: '3.12'
29+
cache: 'pip'
30+
31+
- name: Install Python dependencies
32+
working-directory: ./dev
33+
run: |
34+
pip install -r requirements.txt
35+
pip install huggingface_hub
36+
37+
- name: Pip list
38+
run: pip list
39+
40+
- name: Install Playwright
41+
run: playwright install chromium --with-deps
42+
43+
- name: Run L1 tests
44+
run: pytest -n 20 --durations=10 --slowmo 1000 -v tests/test_task_general.py
45+
46+
47+
test-snow-instance:
48+
name: Test snow instance
49+
runs-on: ubuntu-22.04
50+
needs: test-l1-tasks # remove this line if you want both jobs to run in parallel
51+
52+
defaults:
53+
run:
54+
shell: bash -l {0}
55+
56+
env:
57+
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
58+
59+
steps:
60+
- name: Checkout Repository
61+
uses: actions/checkout@v4
62+
63+
- name: Set up Python
64+
uses: actions/setup-python@v5
65+
with:
66+
python-version: '3.12'
67+
cache: 'pip'
68+
69+
- name: Install Python dependencies
70+
working-directory: ./dev
71+
run: |
72+
pip install -r requirements.txt
73+
pip install huggingface_hub
74+
75+
- name: Pip list
76+
run: pip list
77+
78+
- name: Install Playwright
79+
run: playwright install chromium --with-deps
80+
81+
- name: Run snow instance tests
82+
run: pytest -n 20 --durations=10 --slowmo 1000 -v tests/test_snow_instance.py
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
name: Pool Usage Telemetry
2+
3+
on:
4+
schedule:
5+
# Run daily at 00:00 UTC
6+
- cron: '0 0 * * *'
7+
workflow_dispatch: # Allow manual trigger
8+
9+
jobs:
10+
telemetry:
11+
runs-on: ubuntu-latest
12+
13+
defaults:
14+
run:
15+
shell: bash -l {0}
16+
17+
env:
18+
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
19+
20+
steps:
21+
- name: Checkout repository
22+
uses: actions/checkout@v4
23+
24+
- name: Set up Python
25+
uses: actions/setup-python@v5
26+
with:
27+
python-version: '3.12'
28+
cache: 'pip'
29+
30+
- name: Install Python dependencies
31+
working-directory: ./dev
32+
run: |
33+
pip install -r requirements.txt
34+
pip install huggingface_hub
35+
pip install wandb>=0.16
36+
37+
- name: Run telemetry script
38+
env:
39+
WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
40+
run: |
41+
python monitor_pool_usage.py

.github/workflows/unit_tests.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ jobs:
3636
run: black . --check
3737

3838
browsergym-workarena-fast:
39-
runs-on: ubuntu-latest
39+
runs-on: ubuntu-22.04
4040

4141
defaults:
4242
run:
@@ -50,7 +50,7 @@ jobs:
5050
- name: Set up Python
5151
uses: actions/setup-python@v5
5252
with:
53-
python-version: '3.10'
53+
python-version: '3.12'
5454
cache: 'pip' # caching pip dependencies
5555

5656
- name: Pip install
@@ -59,9 +59,9 @@ jobs:
5959

6060
- name: Pip list
6161
run: pip list
62-
62+
6363
- name: Install Playwright
64-
run: playwright install --with-deps
64+
run: playwright install chromium --with-deps
6565

6666
- name: Run non-slow browsergym-workarena Unit Tests
6767
env:

CITATION.cff

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
cff-version: 1.2.0
2+
message: "If you use WorkArena in your research, please cite both of the following papers."
3+
title: "WorkArena"
4+
date-released: 2024-03-12
5+
url: "https://github.com/ServiceNow/workarena"
6+
license: "Apache-2.0"
7+
8+
authors:
9+
- family-names: Drouin
10+
given-names: Alexandre
11+
- family-names: Gasse
12+
given-names: Maxime
13+
- family-names: Caccia
14+
given-names: Massimo
15+
- family-names: Laradji
16+
given-names: Issam H.
17+
- family-names: Del Verme
18+
given-names: Manuel
19+
- family-names: Marty
20+
given-names: Tom
21+
- family-names: Vazquez
22+
given-names: David
23+
- family-names: Chapados
24+
given-names: Nicolas
25+
- family-names: Lacoste
26+
given-names: Alexandre
27+
28+
preferred-citation:
29+
- type: inproceedings
30+
title: "WorkArena: How Capable are Web Agents at Solving Common Knowledge Work Tasks?"
31+
authors:
32+
- family-names: Drouin
33+
given-names: Alexandre
34+
- family-names: Gasse
35+
given-names: Maxime
36+
- family-names: Caccia
37+
given-names: Massimo
38+
- family-names: Laradji
39+
given-names: Issam H.
40+
- family-names: Del Verme
41+
given-names: Manuel
42+
- family-names: Marty
43+
given-names: Tom
44+
- family-names: Vazquez
45+
given-names: David
46+
- family-names: Chapados
47+
given-names: Nicolas
48+
- family-names: Lacoste
49+
given-names: Alexandre
50+
booktitle: "Proceedings of the 41st International Conference on Machine Learning (ICML)"
51+
series: "Proceedings of Machine Learning Research"
52+
volume: 235
53+
pages: "11642–11662"
54+
year: 2024
55+
url: "https://proceedings.mlr.press/v235/drouin24a.html"
56+
57+
- type: inproceedings
58+
title: "WorkArena++: Towards Compositional Planning and Reasoning-based Common Knowledge Work Tasks"
59+
authors:
60+
- family-names: Boisvert
61+
given-names: Léo
62+
- family-names: Thakkar
63+
given-names: Megh
64+
- family-names: Gasse
65+
given-names: Maxime
66+
- family-names: Caccia
67+
given-names: Massimo
68+
- family-names: Le Sellier De Chezelles
69+
given-names: Thibault
70+
- family-names: Cappart
71+
given-names: Quentin
72+
- family-names: Chapados
73+
given-names: Nicolas
74+
- family-names: Lacoste
75+
given-names: Alexandre
76+
- family-names: Drouin
77+
given-names: Alexandre
78+
booktitle: "Advances in Neural Information Processing Systems 37 (NeurIPS 2024), Datasets & Benchmarks Track"
79+
year: 2024
80+
url: "https://proceedings.neurips.cc/paper_files/paper/2024/hash/0b82662b6c32e887bb252a74d8cb2d5e-Paper-Datasets_and_Benchmarks_Track.pdf"
81+
doi: "10.52202/079017-0195"

README.md

Lines changed: 8 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,6 @@
55
# WorkArena: A Benchmark for Evaluating Agents on Knowledge Work Tasks
66
[[Benchmark Contents]](#benchmark-contents)[[Getting Started]](#getting-started)[[Live Demo]](#live-demo)[[BrowserGym]](https://github.com/ServiceNow/BrowserGym)[[Citing This Work]](#citing-this-work)[Join us on Discord!](https://discord.gg/rDkP69X7)
77

8-
## Join Our Discord Community
9-
10-
Want to brainstorm ideas, troubleshoot issues, or just geek out with fellow agent builders? Our official Discord server is the perfect place to connect and collaborate. Come hang out with us to:
11-
12-
- Exchange tips, tricks, and success stories
13-
- Get real-time support and feedback
14-
- Stay updated on the latest features and announcements
15-
16-
[Join us on Discord!](https://discord.gg/rDkP69X7)
17-
188
---
199

2010
### Explore the BrowserGym Ecosystem
@@ -41,39 +31,27 @@ https://github.com/ServiceNow/WorkArena/assets/2374980/68640f09-7d6f-4eb1-b556-c
4131

4232
## Getting Started
4333

44-
To setup WorkArena, you will need to get your own ServiceNow instance, install our Python package, and upload some data to your instance. Follow the steps below to achieve this.
34+
To setup WorkArena, you will need to gain access to ServiceNow instances and install our Python package locally. Follow the steps below to achieve this.
4535

46-
### a) Create a ServiceNow Developer Instance
36+
### a) Gain Access to ServiceNow Instances
4737

48-
1. Go to https://developer.servicenow.com/ and create an account.
49-
2. Click on `Request an instance` and select the `Washington` release (initializing the instance will take a few minutes)
50-
3. Once the instance is ready, you should see your instance URL and credentials. If not, click _Return to the Developer Portal_, then navigate to _Manage instance password_ and click _Reset instance password_.
51-
4. Change the role of the user to admin in yoyr instance parameters ![image](https://github.com/user-attachments/assets/6f0fbf8e-f40f-411a-84cb-fead93d85f60)
38+
1. Navigate to https://huggingface.co/datasets/ServiceNow/WorkArena-Instances.
39+
2. Fill the form, accept the terms to gain access to the gated repository and wait for approval.
40+
3. Ensure that the machine where you will run WorkArena is [authenticated with Hugging Face](https://huggingface.co/docs/hub/en/datasets-polars-auth) (e.g., via huggingface-cli login or the HUGGING_FACE_HUB_TOKEN environment variable).
41+
4. Unset any previous WorkArena environment variables if you are upgrading from a previous install (`SNOW_INSTANCE_URL`, etc.)
5242

53-
5. You should now see your URL and credentials. Based on this information, set the following environment variables:
54-
* `SNOW_INSTANCE_URL`: The URL of your ServiceNow developer instance
55-
* `SNOW_INSTANCE_UNAME`: The username, should be "admin"
56-
* `SNOW_INSTANCE_PWD`: The password, make sure you place the value in quotes "" and be mindful of [escaping special shell characters](https://onlinelinuxtools.com/escape-shell-characters). Running `echo $SNOW_INSTANCE_PWD` should print the correct password.
57-
6. Log into your instance via a browser using the admin credentials. Close any popup that appears on the main screen (e.g., agreeing to analytics).
58-
59-
**Warning:** Feel free to look around the platform, but please make sure you revert any changes (e.g., changes to list views, pinning some menus, etc.) as these changes will be persistent and affect the benchmarking process.
60-
61-
### b) Install WorkArena and Initialize your Instance
43+
### b) Install WorkArena
6244

6345
Run the following command to install WorkArena in the [BrowswerGym](https://github.com/servicenow/browsergym) environment:
6446
```
65-
pip install browsergym
47+
pip install browsergym-workarena
6648
```
6749

6850
Then, install [Playwright](https://github.com/microsoft/playwright):
6951
```
7052
playwright install
7153
```
7254

73-
Finally, run this command in a terminal to upload the benchmark data to your ServiceNow instance:
74-
```
75-
workarena-install
76-
```
7755
Your installation is now complete! 🎉
7856

7957

0 commit comments

Comments
 (0)