ai
/
OpenHands


			
				
					
						
						
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
							# This file was used to create the hugging face dataset from the exercism/python
# github repo.
# Refer to: https://github.com/exercism/python/tree/main/exercises/practice

import os
from pathlib import Path

from datasets import Dataset

tests = sorted(os.listdir('practice/'))
dataset = {
    'instance_id': [],
    'instance_name': [],
    'instruction': [],
    'signature': [],
    'test': [],
}

for i, test in enumerate(tests):
    testdir = Path(f'practice/{test}/')

    dataset['instance_id'].append(i)
    dataset['instance_name'].append(testdir.name.replace('-', '_'))

    # if len(glob.glob(f'practice/{testdir.name}/*.py')) != 2:
    #     print(testdir.name)

    instructions = ''
    introduction = testdir / '.docs/introduction.md'
    if introduction.exists():
        instructions += introduction.read_text()
    instructions += (testdir / '.docs/instructions.md').read_text()
    instructions_append = testdir / '.docs/instructions.append.md'
    if instructions_append.exists():
        instructions += instructions_append.read_text()

    dataset['instruction'].append(instructions)

    signature_file = testdir / (testdir.name + '.py').replace('-', '_')
    dataset['signature'].append(signature_file.read_text())

    test_file = testdir / (testdir.name + '_test.py').replace('-', '_')
    dataset['test'].append(test_file.read_text())

ds = Dataset.from_dict(dataset)

ds.push_to_hub('RajMaheshwari/Exercism-Python')