create_dataset.py 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. # This file was used to create the hugging face dataset from the exercism/python
  2. # github repo.
  3. # Refer to: https://github.com/exercism/python/tree/main/exercises/practice
  4. import os
  5. from pathlib import Path
  6. from datasets import Dataset
  7. tests = sorted(os.listdir('practice/'))
  8. dataset = {
  9. 'instance_id': [],
  10. 'instance_name': [],
  11. 'instruction': [],
  12. 'signature': [],
  13. 'test': [],
  14. }
  15. for i, test in enumerate(tests):
  16. testdir = Path(f'practice/{test}/')
  17. dataset['instance_id'].append(i)
  18. dataset['instance_name'].append(testdir.name.replace('-', '_'))
  19. # if len(glob.glob(f'practice/{testdir.name}/*.py')) != 2:
  20. # print(testdir.name)
  21. instructions = ''
  22. introduction = testdir / '.docs/introduction.md'
  23. if introduction.exists():
  24. instructions += introduction.read_text()
  25. instructions += (testdir / '.docs/instructions.md').read_text()
  26. instructions_append = testdir / '.docs/instructions.append.md'
  27. if instructions_append.exists():
  28. instructions += instructions_append.read_text()
  29. dataset['instruction'].append(instructions)
  30. signature_file = testdir / (testdir.name + '.py').replace('-', '_')
  31. dataset['signature'].append(signature_file.read_text())
  32. test_file = testdir / (testdir.name + '_test.py').replace('-', '_')
  33. dataset['test'].append(test_file.read_text())
  34. ds = Dataset.from_dict(dataset)
  35. ds.push_to_hub('RajMaheshwari/Exercism-Python')